Merge "Generate an hprof file to test ahat."
diff --git a/benchmark/Android.mk b/benchmark/Android.mk
index 09aca98..a4a603a 100644
--- a/benchmark/Android.mk
+++ b/benchmark/Android.mk
@@ -19,6 +19,7 @@
 include art/build/Android.common_build.mk
 
 LIBARTBENCHMARK_COMMON_SRC_FILES := \
+  jobject-benchmark/jobject_benchmark.cc \
   jni-perf/perf_jni.cc \
   scoped-primitive-array/scoped_primitive_array.cc
 
diff --git a/benchmark/jobject-benchmark/info.txt b/benchmark/jobject-benchmark/info.txt
new file mode 100644
index 0000000..f2a256a
--- /dev/null
+++ b/benchmark/jobject-benchmark/info.txt
@@ -0,0 +1,7 @@
+Benchmark for jobject functions
+
+Measures performance of:
+Add/RemoveLocalRef
+Add/RemoveGlobalRef
+Add/RemoveWeakGlobalRef
+Decoding local, weak, global, handle scope jobjects.
diff --git a/benchmark/jobject-benchmark/jobject_benchmark.cc b/benchmark/jobject-benchmark/jobject_benchmark.cc
new file mode 100644
index 0000000..e7ca9eb
--- /dev/null
+++ b/benchmark/jobject-benchmark/jobject_benchmark.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+
+#include "mirror/class-inl.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+namespace {
+
+extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeAddRemoveLocal(
+    JNIEnv* env, jobject jobj, jint reps) {
+  ScopedObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(jobj);
+  CHECK(obj != nullptr);
+  for (jint i = 0; i < reps; ++i) {
+    jobject ref = soa.Env()->AddLocalReference<jobject>(obj);
+    soa.Env()->DeleteLocalRef(ref);
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeLocal(
+    JNIEnv* env, jobject jobj, jint reps) {
+  ScopedObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(jobj);
+  CHECK(obj != nullptr);
+  jobject ref = soa.Env()->AddLocalReference<jobject>(obj);
+  for (jint i = 0; i < reps; ++i) {
+    CHECK_EQ(soa.Decode<mirror::Object*>(ref), obj);
+  }
+  soa.Env()->DeleteLocalRef(ref);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeAddRemoveGlobal(
+    JNIEnv* env, jobject jobj, jint reps) {
+  ScopedObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(jobj);
+  CHECK(obj != nullptr);
+  for (jint i = 0; i < reps; ++i) {
+    jobject ref = soa.Vm()->AddGlobalRef(soa.Self(), obj);
+    soa.Vm()->DeleteGlobalRef(soa.Self(), ref);
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeGlobal(
+    JNIEnv* env, jobject jobj, jint reps) {
+  ScopedObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(jobj);
+  CHECK(obj != nullptr);
+  jobject ref = soa.Vm()->AddGlobalRef(soa.Self(), obj);
+  for (jint i = 0; i < reps; ++i) {
+    CHECK_EQ(soa.Decode<mirror::Object*>(ref), obj);
+  }
+  soa.Vm()->DeleteGlobalRef(soa.Self(), ref);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeAddRemoveWeakGlobal(
+    JNIEnv* env, jobject jobj, jint reps) {
+  ScopedObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(jobj);
+  CHECK(obj != nullptr);
+  for (jint i = 0; i < reps; ++i) {
+    jobject ref = soa.Vm()->AddWeakGlobalRef(soa.Self(), obj);
+    soa.Vm()->DeleteWeakGlobalRef(soa.Self(), ref);
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeWeakGlobal(
+    JNIEnv* env, jobject jobj, jint reps) {
+  ScopedObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(jobj);
+  CHECK(obj != nullptr);
+  jobject ref = soa.Vm()->AddWeakGlobalRef(soa.Self(), obj);
+  for (jint i = 0; i < reps; ++i) {
+    CHECK_EQ(soa.Decode<mirror::Object*>(ref), obj);
+  }
+  soa.Vm()->DeleteWeakGlobalRef(soa.Self(), ref);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeHandleScopeRef(
+    JNIEnv* env, jobject jobj, jint reps) {
+  ScopedObjectAccess soa(env);
+  for (jint i = 0; i < reps; ++i) {
+    soa.Decode<mirror::Object*>(jobj);
+  }
+}
+
+}  // namespace
+}  // namespace art
diff --git a/benchmark/jobject-benchmark/src/JObjectBenchmark.java b/benchmark/jobject-benchmark/src/JObjectBenchmark.java
new file mode 100644
index 0000000..f4c059c
--- /dev/null
+++ b/benchmark/jobject-benchmark/src/JObjectBenchmark.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.google.caliper.SimpleBenchmark;
+
+public class JObjectBenchmark extends SimpleBenchmark {
+  public JObjectBenchmark() {
+    // Make sure to link methods before benchmark starts.
+    System.loadLibrary("artbenchmark");
+    timeAddRemoveLocal(1);
+    timeDecodeLocal(1);
+    timeAddRemoveGlobal(1);
+    timeDecodeGlobal(1);
+    timeAddRemoveWeakGlobal(1);
+    timeDecodeWeakGlobal(1);
+    timeDecodeHandleScopeRef(1);
+  }
+
+  public native void timeAddRemoveLocal(int reps);
+  public native void timeDecodeLocal(int reps);
+  public native void timeAddRemoveGlobal(int reps);
+  public native void timeDecodeGlobal(int reps);
+  public native void timeAddRemoveWeakGlobal(int reps);
+  public native void timeDecodeWeakGlobal(int reps);
+  public native void timeDecodeHandleScopeRef(int reps);
+}
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index a443487..288bddd 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -295,10 +295,6 @@
   art_cflags += -DIMT_SIZE=64
 endif
 
-ifeq ($(ART_USE_OPTIMIZING_COMPILER),true)
-  art_cflags += -DART_USE_OPTIMIZING_COMPILER=1
-endif
-
 ifeq ($(ART_HEAP_POISONING),true)
   art_cflags += -DART_HEAP_POISONING=1
   art_asflags += -DART_HEAP_POISONING=1
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 72cf978..3b2d1cc 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -101,7 +101,10 @@
       # TODO: Having this is not ideal as it might obscure errors. Try to get rid of it.
       LOCAL_LDFLAGS += -z muldefs
       ifeq ($$(HOST_OS),linux)
-        LOCAL_LDLIBS += -lrt
+        LOCAL_LDLIBS += -lrt -lncurses -ltinfo
+      endif
+      ifeq ($$(HOST_OS),darwin)
+        LOCAL_LDLIBS += -lncurses -ltinfo
       endif
     endif
 
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 3a3cb99..d71ae29 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -52,18 +52,12 @@
   core_pic_infix :=
   core_dex2oat_dependency := $(DEX2OAT_DEPENDENCY)
 
-  # With the optimizing compiler, we want to rerun dex2oat whenever there is
-  # a dex2oat change to catch regressions early.
-  ifeq ($(ART_USE_OPTIMIZING_COMPILER), true)
-    core_dex2oat_dependency := $(DEX2OAT)
-  endif
-
   ifeq ($(1),default)
     core_compile_options += --compiler-backend=Quick
   endif
   ifeq ($(1),optimizing)
     core_compile_options += --compiler-backend=Optimizing
-    core_dex2oat_dependency := $(DEX2OAT)
+    core_dex2oat_dependency += $(DEX2OAT)
     core_infix := -optimizing
   endif
   ifeq ($(1),interpreter)
@@ -74,6 +68,10 @@
     core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail
     core_infix := -interp-ac
   endif
+  ifeq ($(1),jit)
+    core_compile_options += --compiler-filter=verify-at-runtime
+    core_infix := -jit
+  endif
   ifeq ($(1),default)
     # Default has no infix, no compile options.
   endif
@@ -154,6 +152,7 @@
 $(eval $(call create-core-oat-host-rule-combination,optimizing,,))
 $(eval $(call create-core-oat-host-rule-combination,interpreter,,))
 $(eval $(call create-core-oat-host-rule-combination,interp-ac,,))
+$(eval $(call create-core-oat-host-rule-combination,jit,,))
 
 valgrindHOST_CORE_IMG_OUTS :=
 valgrindHOST_CORE_OAT_OUTS :=
@@ -161,6 +160,7 @@
 $(eval $(call create-core-oat-host-rule-combination,optimizing,valgrind,32))
 $(eval $(call create-core-oat-host-rule-combination,interpreter,valgrind,32))
 $(eval $(call create-core-oat-host-rule-combination,interp-ac,valgrind,32))
+$(eval $(call create-core-oat-host-rule-combination,jit,valgrind,32))
 
 valgrind-test-art-host-dex2oat-host: $(valgrindHOST_CORE_IMG_OUTS)
 
@@ -172,18 +172,12 @@
   core_pic_infix :=
   core_dex2oat_dependency := $(DEX2OAT_DEPENDENCY)
 
-  # With the optimizing compiler, we want to rerun dex2oat whenever there is
-  # a dex2oat change to catch regressions early.
-  ifeq ($(ART_USE_OPTIMIZING_COMPILER), true)
-    core_dex2oat_dependency := $(DEX2OAT)
-  endif
-
   ifeq ($(1),default)
     core_compile_options += --compiler-backend=Quick
   endif
   ifeq ($(1),optimizing)
     core_compile_options += --compiler-backend=Optimizing
-    core_dex2oat_dependency := $(DEX2OAT)
+    core_dex2oat_dependency += $(DEX2OAT)
     core_infix := -optimizing
   endif
   ifeq ($(1),interpreter)
@@ -194,6 +188,10 @@
     core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail
     core_infix := -interp-ac
   endif
+  ifeq ($(1),jit)
+    core_compile_options += --compiler-filter=verify-at-runtime
+    core_infix := -jit
+  endif
   ifeq ($(1),default)
     # Default has no infix, no compile options.
   endif
@@ -279,6 +277,7 @@
 $(eval $(call create-core-oat-target-rule-combination,optimizing,,))
 $(eval $(call create-core-oat-target-rule-combination,interpreter,,))
 $(eval $(call create-core-oat-target-rule-combination,interp-ac,,))
+$(eval $(call create-core-oat-target-rule-combination,jit,,))
 
 valgrindTARGET_CORE_IMG_OUTS :=
 valgrindTARGET_CORE_OAT_OUTS :=
@@ -286,6 +285,7 @@
 $(eval $(call create-core-oat-target-rule-combination,optimizing,valgrind,32))
 $(eval $(call create-core-oat-target-rule-combination,interpreter,valgrind,32))
 $(eval $(call create-core-oat-target-rule-combination,interp-ac,valgrind,32))
+$(eval $(call create-core-oat-target-rule-combination,jit,valgrind,32))
 
 valgrind-test-art-host-dex2oat-target: $(valgrindTARGET_CORE_IMG_OUTS)
 
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 41e9744..96e13ac 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -158,6 +158,7 @@
     $(LIBART_COMPILER_SRC_FILES_mips) \
 	jni/quick/mips64/calling_convention_mips64.cc \
 	optimizing/code_generator_mips64.cc \
+	optimizing/intrinsics_mips64.cc \
 	utils/mips64/assembler_mips64.cc \
 	utils/mips64/managed_register_mips64.cc \
 
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index dc2bc5c..67b4428 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -92,7 +92,7 @@
 
   void UnreserveImageSpace();
 
-  Compiler::Kind compiler_kind_ = kUseOptimizingCompiler ? Compiler::kOptimizing : Compiler::kQuick;
+  Compiler::Kind compiler_kind_ = Compiler::kOptimizing;
   std::unique_ptr<CompilerOptions> compiler_options_;
   std::unique_ptr<VerificationResults> verification_results_;
   std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 7082bed..d5ac341 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1126,7 +1126,7 @@
     for (size_t i = 0 ; i < core_vmap_table_.size(); ++i) {
       // Copy, stripping out the phys register sort key.
       vmap_encoder.PushBackUnsigned(
-          ~(-1 << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment));
+          ~(~0u << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment));
     }
     // Push a marker to take place of lr.
     vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
@@ -1141,7 +1141,7 @@
       for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) {
         // Copy, stripping out the phys register sort key.
         vmap_encoder.PushBackUnsigned(
-            ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment));
+            ~(~0u << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment));
       }
     }
   } else {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 1f114cf..3c5c2fe 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -148,7 +148,7 @@
   if (arg1.wide == 0) {
     LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
   } else {
-    RegStorage r_tmp = TargetReg(cu_->instruction_set == kMips ? kArg2 : kArg1, kWide);
+    RegStorage r_tmp = TargetReg(kArg2, kWide);
     LoadValueDirectWideFixed(arg1, r_tmp);
   }
   LoadConstant(TargetReg(kArg0, kNotWide), arg0);
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index b5ecf9c..1cd742a 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -391,9 +391,9 @@
     Instruction::IGET_SHORT_QUICK,
     Instruction::INVOKE_LAMBDA,
     Instruction::UNUSED_F4,
-    Instruction::UNUSED_F5,
+    Instruction::CAPTURE_VARIABLE,
     Instruction::CREATE_LAMBDA,
-    Instruction::UNUSED_F7,
+    Instruction::LIBERATE_VARIABLE,
     Instruction::BOX_LAMBDA,
     Instruction::UNBOX_LAMBDA,
     Instruction::UNUSED_FA,
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 939bf40..6ed666b 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -535,37 +535,76 @@
   if (block->visited || block->hidden) {
     return;
   }
-  block->visited = true;
 
-  /* Process this block */
-  DoSSAConversion(block);
+  typedef struct {
+    BasicBlock* bb;
+    int32_t* ssa_map;
+  } BasicBlockInfo;
+  BasicBlockInfo temp;
 
-  /* Save SSA map snapshot */
   ScopedArenaAllocator allocator(&cu_->arena_stack);
-  uint32_t num_vregs = GetNumOfCodeAndTempVRs();
-  int32_t* saved_ssa_map = allocator.AllocArray<int32_t>(num_vregs, kArenaAllocDalvikToSSAMap);
-  size_t map_size = sizeof(saved_ssa_map[0]) * num_vregs;
-  memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size);
+  ScopedArenaVector<BasicBlockInfo> bi_stack(allocator.Adapter());
+  ScopedArenaVector<BasicBlock*> succ_stack(allocator.Adapter());
 
-  if (block->fall_through != NullBasicBlockId) {
-    DoDFSPreOrderSSARename(GetBasicBlock(block->fall_through));
-    /* Restore SSA map snapshot */
-    memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size);
-  }
-  if (block->taken != NullBasicBlockId) {
-    DoDFSPreOrderSSARename(GetBasicBlock(block->taken));
-    /* Restore SSA map snapshot */
-    memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size);
-  }
-  if (block->successor_block_list_type != kNotUsed) {
-    for (SuccessorBlockInfo* successor_block_info : block->successor_blocks) {
-      BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
-      DoDFSPreOrderSSARename(succ_bb);
-      /* Restore SSA map snapshot */
-      memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size);
+  uint32_t num_vregs = GetNumOfCodeAndTempVRs();
+  size_t map_size = sizeof(int32_t) * num_vregs;
+  temp.bb = block;
+  temp.ssa_map = vreg_to_ssa_map_;
+  bi_stack.push_back(temp);
+
+  while (!bi_stack.empty()) {
+    temp = bi_stack.back();
+    bi_stack.pop_back();
+    BasicBlock* b = temp.bb;
+
+    if (b->visited || b->hidden) {
+      continue;
+    }
+    b->visited = true;
+
+    /* Restore SSA map snapshot, except for the first block */
+    if (b != block) {
+      memcpy(vreg_to_ssa_map_, temp.ssa_map, map_size);
+    }
+
+    /* Process this block */
+    DoSSAConversion(b);
+
+    /* If there are no successor, taken, and fall through blocks, continue */
+    if (b->successor_block_list_type == kNotUsed &&
+        b->taken == NullBasicBlockId &&
+        b->fall_through == NullBasicBlockId) {
+      continue;
+    }
+
+    /* Save SSA map snapshot */
+    int32_t* saved_ssa_map =
+      allocator.AllocArray<int32_t>(num_vregs, kArenaAllocDalvikToSSAMap);
+    memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size);
+
+    if (b->successor_block_list_type != kNotUsed) {
+      for (SuccessorBlockInfo* successor_block_info : b->successor_blocks) {
+        BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block);
+        succ_stack.push_back(succ_bb);
+      }
+      while (!succ_stack.empty()) {
+        temp.bb = succ_stack.back();
+        succ_stack.pop_back();
+        temp.ssa_map = saved_ssa_map;
+        bi_stack.push_back(temp);
+      }
+    }
+    if (b->taken != NullBasicBlockId) {
+      temp.bb = GetBasicBlock(b->taken);
+      temp.ssa_map = saved_ssa_map;
+      bi_stack.push_back(temp);
+    }
+    if (b->fall_through != NullBasicBlockId) {
+      temp.bb = GetBasicBlock(b->fall_through);
+      temp.ssa_map = saved_ssa_map;
+      bi_stack.push_back(temp);
     }
   }
-  return;
 }
 
 }  // namespace art
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 955c575..d9f8fcb 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1362,6 +1362,10 @@
         // If src is a ClassLoader, set the class table to null so that it gets recreated by the
         // ClassLoader.
         down_cast<mirror::ClassLoader*>(copy)->SetClassTable(nullptr);
+        // Also set allocator to null to be safe. The allocator is created when we create the class
+        // table. We also never expect to unload things in the image since they are held live as
+        // roots.
+        down_cast<mirror::ClassLoader*>(copy)->SetAllocator(nullptr);
       }
     }
     FixupVisitor visitor(this, copy);
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c96b171..4ddd457 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1123,11 +1123,7 @@
     } while (false)
 
   VISIT(InitCodeMethodVisitor);
-  // InitImageMethodVisitor visits all methods, resolves them (failing if one cannot be resolved,
-  // which is a bad sign for a working boot image), and then install entrypoints.
-  // In case of VerifyAtRuntime, there won't be code, and we do not want to resolve the methods
-  // (this has been skipped in the driver for performance).
-  if (compiler_driver_->IsImage() && !compiler_driver_->GetCompilerOptions().VerifyAtRuntime()) {
+  if (compiler_driver_->IsImage()) {
     VISIT(InitImageMethodVisitor);
   }
 
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 62f5b9a..960f4d9 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -14,8 +14,11 @@
  * limitations under the License.
  */
 
-#include "base/arena_containers.h"
 #include "bounds_check_elimination.h"
+
+#include <limits>
+
+#include "base/arena_containers.h"
 #include "induction_var_range.h"
 #include "nodes.h"
 
@@ -48,11 +51,11 @@
     if (right == 0) {
       return false;
     }
-    if ((right > 0) && (left <= INT_MAX - right)) {
+    if ((right > 0) && (left <= (std::numeric_limits<int32_t>::max() - right))) {
       // No overflow.
       return false;
     }
-    if ((right < 0) && (left >= INT_MIN - right)) {
+    if ((right < 0) && (left >= (std::numeric_limits<int32_t>::min() - right))) {
       // No underflow.
       return false;
     }
@@ -120,8 +123,8 @@
     return instruction_ == nullptr;
   }
 
-  static ValueBound Min() { return ValueBound(nullptr, INT_MIN); }
-  static ValueBound Max() { return ValueBound(nullptr, INT_MAX); }
+  static ValueBound Min() { return ValueBound(nullptr, std::numeric_limits<int32_t>::min()); }
+  static ValueBound Max() { return ValueBound(nullptr, std::numeric_limits<int32_t>::max()); }
 
   bool Equals(ValueBound bound) const {
     return instruction_ == bound.instruction_ && constant_ == bound.constant_;
@@ -213,7 +216,7 @@
 
     int32_t new_constant;
     if (c > 0) {
-      if (constant_ > INT_MAX - c) {
+      if (constant_ > (std::numeric_limits<int32_t>::max() - c)) {
         *overflow = true;
         return Max();
       }
@@ -227,7 +230,7 @@
       *overflow = true;
       return Max();
     } else {
-      if (constant_ < INT_MIN - c) {
+      if (constant_ < (std::numeric_limits<int32_t>::min() - c)) {
         *underflow = true;
         return Min();
       }
@@ -256,8 +259,8 @@
   explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
       : induction_variable_(induction_variable),
         found_array_length_(nullptr),
-        offset_low_(INT_MAX),
-        offset_high_(INT_MIN) {
+        offset_low_(std::numeric_limits<int32_t>::max()),
+        offset_high_(std::numeric_limits<int32_t>::min()) {
     Run();
   }
 
@@ -407,7 +410,7 @@
  * of an existing value range, NewArray or a loop phi corresponding to an
  * incrementing/decrementing array index (MonotonicValueRange).
  */
-class ValueRange : public ArenaObject<kArenaAllocMisc> {
+class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> {
  public:
   ValueRange(ArenaAllocator* allocator, ValueBound lower, ValueBound upper)
       : allocator_(allocator), lower_(lower), upper_(upper) {}
@@ -492,7 +495,7 @@
                       HInstruction* initial,
                       int32_t increment,
                       ValueBound bound)
-      // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's
+      // To be conservative, give it full range [Min(), Max()] in case it's
       // used as a regular value range, due to possible overflow/underflow.
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
         induction_variable_(induction_variable),
@@ -554,19 +557,19 @@
     if (increment_ > 0) {
       // Monotonically increasing.
       ValueBound lower = ValueBound::NarrowLowerBound(bound_, range->GetLower());
-      if (!lower.IsConstant() || lower.GetConstant() == INT_MIN) {
+      if (!lower.IsConstant() || lower.GetConstant() == std::numeric_limits<int32_t>::min()) {
         // Lower bound isn't useful. Leave it to deoptimization.
         return this;
       }
 
-      // We currently conservatively assume max array length is INT_MAX. If we can
-      // make assumptions about the max array length, e.g. due to the max heap size,
+      // We currently conservatively assume max array length is Max().
+      // If we can make assumptions about the max array length, e.g. due to the max heap size,
       // divided by the element size (such as 4 bytes for each integer array), we can
       // lower this number and rule out some possible overflows.
-      int32_t max_array_len = INT_MAX;
+      int32_t max_array_len = std::numeric_limits<int32_t>::max();
 
       // max possible integer value of range's upper value.
-      int32_t upper = INT_MAX;
+      int32_t upper = std::numeric_limits<int32_t>::max();
       // Try to lower upper.
       ValueBound upper_bound = range->GetUpper();
       if (upper_bound.IsConstant()) {
@@ -593,7 +596,7 @@
               ((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_;
         }
       }
-      if (last_num_in_sequence <= INT_MAX - increment_) {
+      if (last_num_in_sequence <= (std::numeric_limits<int32_t>::max() - increment_)) {
         // No overflow. The sequence will be stopped by the upper bound test as expected.
         return new (GetAllocator()) ValueRange(GetAllocator(), lower, range->GetUpper());
       }
@@ -604,7 +607,7 @@
       DCHECK_NE(increment_, 0);
       // Monotonically decreasing.
       ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper());
-      if ((!upper.IsConstant() || upper.GetConstant() == INT_MAX) &&
+      if ((!upper.IsConstant() || upper.GetConstant() == std::numeric_limits<int32_t>::max()) &&
           !upper.IsRelatedToArrayLength()) {
         // Upper bound isn't useful. Leave it to deoptimization.
         return this;
@@ -614,7 +617,7 @@
       // for common cases.
       if (range->GetLower().IsConstant()) {
         int32_t constant = range->GetLower().GetConstant();
-        if (constant >= INT_MIN - increment_) {
+        if (constant >= (std::numeric_limits<int32_t>::min() - increment_)) {
           return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper);
         }
       }
@@ -1099,7 +1102,8 @@
   // Very large constant index is considered as an anomaly. This is a threshold
   // beyond which we don't bother to apply the deoptimization technique since
   // it's likely some AIOOBE will be thrown.
-  static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024;
+  static constexpr int32_t kMaxConstantForAddingDeoptimize =
+      std::numeric_limits<int32_t>::max() - 1024 * 1024;
 
   // Added blocks for loop body entry test.
   bool IsAddedBlock(HBasicBlock* block) const {
@@ -1108,7 +1112,14 @@
 
   BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis)
       : HGraphVisitor(graph),
-        maps_(graph->GetBlocks().size()),
+        maps_(graph->GetBlocks().size(),
+              ArenaSafeMap<int, ValueRange*>(
+                  std::less<int>(),
+                  graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+              graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        first_constant_index_bounds_check_map_(
+            std::less<int>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
         need_to_revisit_block_(false),
         initial_block_size_(graph->GetBlocks().size()),
         induction_range_(induction_analysis) {}
@@ -1133,14 +1144,9 @@
       // Added blocks don't keep value ranges.
       return nullptr;
     }
-    int block_id = basic_block->GetBlockId();
-    if (maps_.at(block_id) == nullptr) {
-      std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map(
-          new ArenaSafeMap<int, ValueRange*>(
-              std::less<int>(), GetGraph()->GetArena()->Adapter()));
-      maps_.at(block_id) = std::move(map);
-    }
-    return maps_.at(block_id).get();
+    uint32_t block_id = basic_block->GetBlockId();
+    DCHECK_LT(block_id, maps_.size());
+    return &maps_[block_id];
   }
 
   // Traverse up the dominator tree to look for value range info.
@@ -1165,8 +1171,8 @@
   ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
     InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction);
     InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction);
-    if ((v1.a_constant == 0 || v1.a_constant == 1) && v1.b_constant != INT_MIN &&
-        (v2.a_constant == 0 || v2.a_constant == 1) && v2.b_constant != INT_MAX) {
+    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
       DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
       DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
       ValueBound low = ValueBound(v1.instruction, v1.b_constant);
@@ -1467,8 +1473,8 @@
       // Once we have an array access like 'array[5] = 1', we record array.length >= 6.
       // We currently don't do it for non-constant index since a valid array[i] can't prove
       // a valid array[i-1] yet due to the lower bound side.
-      if (constant == INT_MAX) {
-        // INT_MAX as an index will definitely throw AIOOBE.
+      if (constant == std::numeric_limits<int32_t>::max()) {
+        // Max() as an index will definitely throw AIOOBE.
         return;
       }
       ValueBound lower = ValueBound(nullptr, constant + 1);
@@ -1690,8 +1696,8 @@
     // The value of left input of instruction equals (left + c).
 
     // (array_length + 1) or smaller divided by two or more
-    // always generate a value in [INT_MIN, array_length].
-    // This is true even if array_length is INT_MAX.
+    // always generate a value in [Min(), array_length].
+    // This is true even if array_length is Max().
     if (left->IsArrayLength() && c <= 1) {
       if (instruction->IsUShr() && c < 0) {
         // Make sure for unsigned shift, left side is not negative.
@@ -1701,7 +1707,7 @@
       }
       ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
           GetGraph()->GetArena(),
-          ValueBound(nullptr, INT_MIN),
+          ValueBound(nullptr, std::numeric_limits<int32_t>::min()),
           ValueBound(left, 0));
       GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range);
     }
@@ -1811,7 +1817,7 @@
         continue;
       }
       HIntConstant* lower_bound_const_instr = nullptr;
-      int32_t lower_bound_const = INT_MIN;
+      int32_t lower_bound_const = std::numeric_limits<int32_t>::min();
       size_t counter = 0;
       // Count the constant indexing for which bounds checks haven't
       // been removed yet.
@@ -1838,11 +1844,11 @@
     }
   }
 
-  std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_;
+  ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_;
 
   // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
   // a block that checks a constant index against that HArrayLength.
-  SafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
+  ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
 
   // For the block, there is at least one HArrayLength instruction for which there
   // is more than one bounds check instruction with constant indexing. And it's
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 274a2a6..ebbfb14 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -140,11 +140,11 @@
 
 void HGraphBuilder::InitializeLocals(uint16_t count) {
   graph_->SetNumberOfVRegs(count);
-  locals_.SetSize(count);
+  locals_.resize(count);
   for (int i = 0; i < count; i++) {
     HLocal* local = new (arena_) HLocal(i);
     entry_block_->AddInstruction(local);
-    locals_.Put(i, local);
+    locals_[i] = local;
   }
 }
 
@@ -156,7 +156,7 @@
 
   graph_->SetNumberOfInVRegs(number_of_parameters);
   const char* shorty = dex_compilation_unit_->GetShorty();
-  int locals_index = locals_.Size() - number_of_parameters;
+  int locals_index = locals_.size() - number_of_parameters;
   int parameter_index = 0;
 
   if (!dex_compilation_unit_->IsStatic()) {
@@ -262,22 +262,6 @@
   return false;
 }
 
-static const DexFile::TryItem* GetTryItem(HBasicBlock* block,
-                                          const DexFile::CodeItem& code_item,
-                                          const ArenaBitVector& can_block_throw) {
-  DCHECK(!block->IsSingleTryBoundary());
-
-  // Block does not contain throwing instructions. Even if it is covered by
-  // a TryItem, we will consider it not in a try block.
-  if (!can_block_throw.IsBitSet(block->GetBlockId())) {
-    return nullptr;
-  }
-
-  // Instructions in the block may throw. Find a TryItem covering this block.
-  int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc());
-  return (try_item_idx == -1) ? nullptr : DexFile::GetTryItems(code_item, try_item_idx);
-}
-
 void HGraphBuilder::CreateBlocksForTryCatch(const DexFile::CodeItem& code_item) {
   if (code_item.tries_size_ == 0) {
     return;
@@ -316,18 +300,18 @@
   }
 }
 
-void HGraphBuilder::SplitTryBoundaryEdge(HBasicBlock* predecessor,
-                                         HBasicBlock* successor,
-                                         HTryBoundary::BoundaryKind kind,
-                                         const DexFile::CodeItem& code_item,
-                                         const DexFile::TryItem& try_item) {
-  // Split the edge with a single TryBoundary instruction.
-  HTryBoundary* try_boundary = new (arena_) HTryBoundary(kind, successor->GetDexPc());
-  HBasicBlock* try_entry_block = graph_->SplitEdge(predecessor, successor);
-  try_entry_block->AddInstruction(try_boundary);
+// Returns the TryItem stored for `block` or nullptr if there is no info for it.
+static const DexFile::TryItem* GetTryItem(
+    HBasicBlock* block,
+    const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) {
+  auto iterator = try_block_info.find(block->GetBlockId());
+  return (iterator == try_block_info.end()) ? nullptr : iterator->second;
+}
 
-  // Link the TryBoundary to the handlers of `try_item`.
-  for (CatchHandlerIterator it(code_item, try_item); it.HasNext(); it.Next()) {
+void HGraphBuilder::LinkToCatchBlocks(HTryBoundary* try_boundary,
+                                      const DexFile::CodeItem& code_item,
+                                      const DexFile::TryItem* try_item) {
+  for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) {
     try_boundary->AddExceptionHandler(FindBlockStartingAt(it.GetHandlerAddress()));
   }
 }
@@ -337,132 +321,103 @@
     return;
   }
 
-  // Bit vector stores information on which blocks contain throwing instructions.
-  // Must be expandable because catch blocks may be split into two.
-  ArenaBitVector can_block_throw(arena_, graph_->GetBlocks().size(), /* expandable */ true);
+  // Keep a map of all try blocks and their respective TryItems. We do not use
+  // the block's pointer but rather its id to ensure deterministic iteration.
+  ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info(
+      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
 
-  // Scan blocks and mark those which contain throwing instructions.
-  // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators
-  // can be invalidated. We remember the initial size to avoid iterating over the new blocks.
-  for (size_t block_id = 0u, end = graph_->GetBlocks().size(); block_id != end; ++block_id) {
-    HBasicBlock* block = graph_->GetBlocks()[block_id];
-    bool can_throw = false;
-    for (HInstructionIterator insn(block->GetInstructions()); !insn.Done(); insn.Advance()) {
-      if (insn.Current()->CanThrow()) {
-        can_throw = true;
-        break;
-      }
-    }
+  // Obtain TryItem information for blocks with throwing instructions, and split
+  // blocks which are both try & catch to simplify the graph.
+  // NOTE: We are appending new blocks inside the loop, so we need to use index
+  // because iterators can be invalidated. We remember the initial size to avoid
+  // iterating over the new blocks which cannot throw.
+  for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
+    HBasicBlock* block = graph_->GetBlocks()[i];
 
-    if (can_throw) {
-      if (block->IsCatchBlock()) {
-        // Catch blocks are always considered an entry point into the TryItem in
-        // order to avoid splitting exceptional edges. We split the block after
-        // the move-exception (if present) and mark the first part non-throwing.
-        // Later on, a TryBoundary will be inserted between the two blocks.
-        HInstruction* first_insn = block->GetFirstInstruction();
-        if (first_insn->IsLoadException()) {
-          // Catch block starts with a LoadException. Split the block after the
-          // StoreLocal and ClearException which must come after the load.
-          DCHECK(first_insn->GetNext()->IsStoreLocal());
-          DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
-          block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext());
-        } else {
-          // Catch block does not load the exception. Split at the beginning to
-          // create an empty catch block.
-          block = block->SplitBefore(first_insn);
+    // Do not bother creating exceptional edges for try blocks which have no
+    // throwing instructions. In that case we simply assume that the block is
+    // not covered by a TryItem. This prevents us from creating a throw-catch
+    // loop for synchronized blocks.
+    if (block->HasThrowingInstructions()) {
+      // Try to find a TryItem covering the block.
+      DCHECK_NE(block->GetDexPc(), kNoDexPc) << "Block must have a dec_pc to find its TryItem.";
+      const int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc());
+      if (try_item_idx != -1) {
+        // Block throwing and in a TryItem. Store the try block information.
+        HBasicBlock* throwing_block = block;
+        if (block->IsCatchBlock()) {
+          // Simplify blocks which are both try and catch, otherwise we would
+          // need a strategy for splitting exceptional edges. We split the block
+          // after the move-exception (if present) and mark the first part not
+          // throwing. The normal-flow edge between them will be split later.
+          HInstruction* first_insn = block->GetFirstInstruction();
+          if (first_insn->IsLoadException()) {
+            // Catch block starts with a LoadException. Split the block after
+            // the StoreLocal and ClearException which must come after the load.
+            DCHECK(first_insn->GetNext()->IsStoreLocal());
+            DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
+            throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext());
+          } else {
+            // Catch block does not load the exception. Split at the beginning
+            // to create an empty catch block.
+            throwing_block = block->SplitBefore(first_insn);
+          }
         }
+
+        try_block_info.Put(throwing_block->GetBlockId(),
+                           DexFile::GetTryItems(code_item, try_item_idx));
       }
-      can_block_throw.SetBit(block->GetBlockId());
     }
   }
 
-  // Iterate over all blocks, find those covered by some TryItem and:
-  //   (a) split edges which enter/exit the try range,
-  //   (b) create TryBoundary instructions in the new blocks,
-  //   (c) link the new blocks to corresponding exception handlers.
-  // We cannot iterate only over blocks in `branch_targets_` because switch-case
-  // blocks share the same dex_pc.
-  // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators
-  // can be invalidated. We remember the initial size to avoid iterating over the new blocks.
-  for (size_t block_id = 0u, end = graph_->GetBlocks().size(); block_id != end; ++block_id) {
-    HBasicBlock* try_block = graph_->GetBlocks()[block_id];
-    // TryBoundary blocks are added at the end of the list and not iterated over.
-    DCHECK(!try_block->IsSingleTryBoundary());
-
-    // Find the TryItem for this block.
-    const DexFile::TryItem* try_item = GetTryItem(try_block, code_item, can_block_throw);
-    if (try_item == nullptr) {
-      continue;
+  // Do a pass over the try blocks and insert entering TryBoundaries where at
+  // least one predecessor is not covered by the same TryItem as the try block.
+  // We do not split each edge separately, but rather create one boundary block
+  // that all predecessors are relinked to. This preserves loop headers (b/23895756).
+  for (auto entry : try_block_info) {
+    HBasicBlock* try_block = graph_->GetBlock(entry.first);
+    for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
+      if (GetTryItem(predecessor, try_block_info) != entry.second) {
+        // Found a predecessor not covered by the same TryItem. Insert entering
+        // boundary block.
+        HTryBoundary* try_entry =
+            new (arena_) HTryBoundary(HTryBoundary::kEntry, try_block->GetDexPc());
+        try_block->CreateImmediateDominator()->AddInstruction(try_entry);
+        LinkToCatchBlocks(try_entry, code_item, entry.second);
+        break;
+      }
     }
+  }
 
-    // Catch blocks were split earlier and cannot throw.
-    DCHECK(!try_block->IsCatchBlock());
+  // Do a second pass over the try blocks and insert exit TryBoundaries where
+  // the successor is not in the same TryItem.
+  for (auto entry : try_block_info) {
+    HBasicBlock* try_block = graph_->GetBlock(entry.first);
+    // NOTE: Do not use iterators because SplitEdge would invalidate them.
+    for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
+      HBasicBlock* successor = try_block->GetSuccessor(i);
 
-    // Find predecessors which are not covered by the same TryItem range. Such
-    // edges enter the try block and will have a TryBoundary inserted.
-    for (size_t i = 0; i < try_block->GetPredecessors().size(); ++i) {
-      HBasicBlock* predecessor = try_block->GetPredecessor(i);
-      if (predecessor->IsSingleTryBoundary()) {
-        // The edge was already split because of an exit from a neighbouring
-        // TryItem. We split it again and insert an entry point.
-        if (kIsDebugBuild) {
-          HTryBoundary* last_insn = predecessor->GetLastInstruction()->AsTryBoundary();
-          const DexFile::TryItem* predecessor_try_item =
-              GetTryItem(predecessor->GetSinglePredecessor(), code_item, can_block_throw);
-          DCHECK(!last_insn->IsEntry());
-          DCHECK_EQ(last_insn->GetNormalFlowSuccessor(), try_block);
-          DCHECK(try_block->IsFirstIndexOfPredecessor(predecessor, i));
-          DCHECK_NE(try_item, predecessor_try_item);
-        }
-      } else if (GetTryItem(predecessor, code_item, can_block_throw) != try_item) {
-        // This is an entry point into the TryItem and the edge has not been
-        // split yet. That means that `predecessor` is not in a TryItem, or
-        // it is in a different TryItem and we happened to iterate over this
-        // block first. We split the edge and insert an entry point.
-      } else {
-        // Not an edge on the boundary of the try block.
+      // If the successor is a try block, all of its predecessors must be
+      // covered by the same TryItem. Otherwise the previous pass would have
+      // created a non-throwing boundary block.
+      if (GetTryItem(successor, try_block_info) != nullptr) {
+        DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
         continue;
       }
-      SplitTryBoundaryEdge(predecessor, try_block, HTryBoundary::kEntry, code_item, *try_item);
-    }
 
-    // Find successors which are not covered by the same TryItem range. Such
-    // edges exit the try block and will have a TryBoundary inserted.
-    for (HBasicBlock* successor : try_block->GetSuccessors()) {
-      if (successor->IsCatchBlock()) {
-        // A catch block is always considered an entry point into its TryItem.
-        // We therefore assume this is an exit point, regardless of whether
-        // the catch block is in a different TryItem or not.
-      } else if (successor->IsSingleTryBoundary()) {
-        // The edge was already split because of an entry into a neighbouring
-        // TryItem. We split it again and insert an exit.
-        if (kIsDebugBuild) {
-          HTryBoundary* last_insn = successor->GetLastInstruction()->AsTryBoundary();
-          const DexFile::TryItem* successor_try_item =
-              GetTryItem(last_insn->GetNormalFlowSuccessor(), code_item, can_block_throw);
-          DCHECK_EQ(try_block, successor->GetSinglePredecessor());
-          DCHECK(last_insn->IsEntry());
-          DCHECK_NE(try_item, successor_try_item);
-        }
-      } else if (GetTryItem(successor, code_item, can_block_throw) != try_item) {
-        // This is an exit out of the TryItem and the edge has not been split
-        // yet. That means that either `successor` is not in a TryItem, or it
-        // is in a different TryItem and we happened to iterate over this
-        // block first. We split the edge and insert an exit.
-        HInstruction* last_instruction = try_block->GetLastInstruction();
-        if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) {
-          DCHECK_EQ(successor, exit_block_);
-          // Control flow exits the try block with a Return(Void). Because
-          // splitting the edge would invalidate the invariant that Return
-          // always jumps to Exit, we move the Return outside the try block.
-          successor = try_block->SplitBefore(last_instruction);
-        }
-      } else {
-        // Not an edge on the boundary of the try block.
-        continue;
+      // Preserve the invariant that Return(Void) always jumps to Exit by moving
+      // it outside the try block if necessary.
+      HInstruction* last_instruction = try_block->GetLastInstruction();
+      if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) {
+        DCHECK_EQ(successor, exit_block_);
+        successor = try_block->SplitBefore(last_instruction);
       }
-      SplitTryBoundaryEdge(try_block, successor, HTryBoundary::kExit, code_item, *try_item);
+
+      // Insert TryBoundary and link to catch blocks.
+      HTryBoundary* try_exit =
+          new (arena_) HTryBoundary(HTryBoundary::kExit, successor->GetDexPc());
+      graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
+      LinkToCatchBlocks(try_exit, code_item, entry.second);
     }
   }
 }
@@ -554,11 +509,11 @@
 bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
                                          const uint16_t* code_end,
                                          size_t* number_of_branches) {
-  branch_targets_.SetSize(code_end - code_ptr);
+  branch_targets_.resize(code_end - code_ptr, nullptr);
 
   // Create the first block for the dex instructions, single successor of the entry block.
   HBasicBlock* block = new (arena_) HBasicBlock(graph_, 0);
-  branch_targets_.Put(0, block);
+  branch_targets_[0] = block;
   entry_block_->AddSuccessor(block);
 
   // Iterate over all instructions and find branching instructions. Create blocks for
@@ -602,7 +557,7 @@
         // Create a block for the switch-case logic. The block gets the dex_pc
         // of the SWITCH instruction because it is part of its semantics.
         block = new (arena_) HBasicBlock(graph_, dex_pc);
-        branch_targets_.Put(table.GetDexPcForIndex(i), block);
+        branch_targets_[table.GetDexPcForIndex(i)] = block;
       }
 
       // Fall-through. Add a block if there is more code afterwards.
@@ -626,15 +581,15 @@
 
 HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t dex_pc) const {
   DCHECK_GE(dex_pc, 0);
-  DCHECK_LT(static_cast<size_t>(dex_pc), branch_targets_.Size());
-  return branch_targets_.Get(dex_pc);
+  DCHECK_LT(static_cast<size_t>(dex_pc), branch_targets_.size());
+  return branch_targets_[dex_pc];
 }
 
 HBasicBlock* HGraphBuilder::FindOrCreateBlockStartingAt(int32_t dex_pc) {
   HBasicBlock* block = FindBlockStartingAt(dex_pc);
   if (block == nullptr) {
     block = new (arena_) HBasicBlock(graph_, dex_pc);
-    branch_targets_.Put(dex_pc, block);
+    branch_targets_[dex_pc] = block;
   }
   return block;
 }
@@ -1231,6 +1186,12 @@
   }
 }
 
+static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
+  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
+  return Primitive::GetType(type[0]);
+}
+
 bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
                                              uint32_t dex_pc,
                                              bool is_put) {
@@ -1250,44 +1211,61 @@
   ArtField* resolved_field =
       compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
 
-  if (resolved_field == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField);
-    return false;
-  }
-
-  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
 
   HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot, dex_pc);
-  current_block_->AddInstruction(new (arena_) HNullCheck(object, dex_pc));
+  HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc);
+  current_block_->AddInstruction(null_check);
+
+  Primitive::Type field_type = (resolved_field == nullptr)
+      ? GetFieldAccessType(*dex_file_, field_index)
+      : resolved_field->GetTypeAsPrimitiveType();
   if (is_put) {
     Temporaries temps(graph_);
-    HInstruction* null_check = current_block_->GetLastInstruction();
     // We need one temporary for the null check.
     temps.Add(null_check);
     HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    current_block_->AddInstruction(new (arena_) HInstanceFieldSet(
-        null_check,
-        value,
-        field_type,
-        resolved_field->GetOffset(),
-        resolved_field->IsVolatile(),
-        field_index,
-        *dex_file_,
-        dex_compilation_unit_->GetDexCache(),
-        dex_pc));
+    HInstruction* field_set = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check,
+                                                           value,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      field_set = new (arena_) HInstanceFieldSet(null_check,
+                                                 value,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    current_block_->AddInstruction(field_set);
   } else {
-    current_block_->AddInstruction(new (arena_) HInstanceFieldGet(
-        current_block_->GetLastInstruction(),
-        field_type,
-        resolved_field->GetOffset(),
-        resolved_field->IsVolatile(),
-        field_index,
-        *dex_file_,
-        dex_compilation_unit_->GetDexCache(),
-        dex_pc));
-
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
+    HInstruction* field_get = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      field_get = new (arena_) HInstanceFieldGet(null_check,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    current_block_->AddInstruction(field_get);
+    UpdateLocal(source_or_dest_reg, field_get, dex_pc);
   }
+
   return true;
 }
 
@@ -1327,6 +1305,23 @@
   return outer_class.Get() == cls.Get();
 }
 
+void HGraphBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                                     uint32_t dex_pc,
+                                                     bool is_put,
+                                                     Primitive::Type field_type) {
+  uint32_t source_or_dest_reg = instruction.VRegA_21c();
+  uint16_t field_index = instruction.VRegB_21c();
+
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
+    current_block_->AddInstruction(
+        new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc));
+  } else {
+    current_block_->AddInstruction(
+        new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc));
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
+  }
+}
 bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
                                            uint32_t dex_pc,
                                            bool is_put) {
@@ -1344,10 +1339,13 @@
       soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
 
   if (resolved_field == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField);
-    return false;
+    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+    Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
+    BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+    return true;
   }
 
+  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
       outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
@@ -1362,6 +1360,7 @@
     // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
     return false;
   } else {
+    // TODO: This is rather expensive. Perf it and cache the results if needed.
     std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
         outer_dex_cache.Get(),
         GetCompilingClass(),
@@ -1370,7 +1369,9 @@
         &storage_index);
     bool can_easily_access = is_put ? pair.second : pair.first;
     if (!can_easily_access) {
-      return false;
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
+      BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+      return true;
     }
   }
 
@@ -1391,8 +1392,6 @@
     cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
-
-  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
   if (is_put) {
     // We need to keep the class alive before loading the value.
     Temporaries temps(graph_);
@@ -1685,6 +1684,34 @@
       dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index);
 }
 
+void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table,
+                                         const Instruction& instruction,
+                                         HInstruction* value,
+                                         uint32_t dex_pc) {
+  // Add the successor blocks to the current block.
+  uint16_t num_entries = table.GetNumEntries();
+  for (size_t i = 1; i <= num_entries; i++) {
+    int32_t target_offset = table.GetEntryAt(i);
+    HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset);
+    DCHECK(case_target != nullptr);
+
+    // Add the target block as a successor.
+    current_block_->AddSuccessor(case_target);
+  }
+
+  // Add the default target block as the last successor.
+  HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
+  DCHECK(default_target != nullptr);
+  current_block_->AddSuccessor(default_target);
+
+  // Now add the Switch instruction.
+  int32_t starting_key = table.GetEntryAt(0);
+  current_block_->AddInstruction(
+      new (arena_) HPackedSwitch(starting_key, num_entries, value, dex_pc));
+  // This block ends with control flow.
+  current_block_ = nullptr;
+}
+
 void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) {
   // Verifier guarantees that the payload for PackedSwitch contains:
   //   (a) number of entries (may be zero)
@@ -1695,18 +1722,30 @@
   // Value to test against.
   HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
 
+  // Starting key value.
+  int32_t starting_key = table.GetEntryAt(0);
+
   // Retrieve number of entries.
   uint16_t num_entries = table.GetNumEntries();
   if (num_entries == 0) {
     return;
   }
 
-  // Chained cmp-and-branch, starting from starting_key.
-  int32_t starting_key = table.GetEntryAt(0);
-
-  for (size_t i = 1; i <= num_entries; i++) {
-    BuildSwitchCaseHelper(instruction, i, i == num_entries, table, value, starting_key + i - 1,
-                          table.GetEntryAt(i), dex_pc);
+  // Don't use a packed switch if there are very few entries.
+  if (num_entries > kSmallSwitchThreshold) {
+    BuildSwitchJumpTable(table, instruction, value, dex_pc);
+  } else {
+    // Chained cmp-and-branch, starting from starting_key.
+    for (size_t i = 1; i <= num_entries; i++) {
+      BuildSwitchCaseHelper(instruction,
+                            i,
+                            i == num_entries,
+                            table,
+                            value,
+                            starting_key + i - 1,
+                            table.GetEntryAt(i),
+                            dex_pc);
+    }
   }
 }
 
@@ -2840,18 +2879,19 @@
   return true;
 }  // NOLINT(readability/fn_size)
 
-HLocal* HGraphBuilder::GetLocalAt(int register_index) const {
-  return locals_.Get(register_index);
+HLocal* HGraphBuilder::GetLocalAt(uint32_t register_index) const {
+  DCHECK_LT(register_index, locals_.size());
+  return locals_[register_index];
 }
 
-void HGraphBuilder::UpdateLocal(int register_index,
+void HGraphBuilder::UpdateLocal(uint32_t register_index,
                                 HInstruction* instruction,
                                 uint32_t dex_pc) const {
   HLocal* local = GetLocalAt(register_index);
   current_block_->AddInstruction(new (arena_) HStoreLocal(local, instruction, dex_pc));
 }
 
-HInstruction* HGraphBuilder::LoadLocal(int register_index,
+HInstruction* HGraphBuilder::LoadLocal(uint32_t register_index,
                                        Primitive::Type type,
                                        uint32_t dex_pc) const {
   HLocal* local = GetLocalAt(register_index);
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index ae452f2..b2dc241 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_BUILDER_H_
 #define ART_COMPILER_OPTIMIZING_BUILDER_H_
 
+#include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "dex_file.h"
 #include "dex_file-inl.h"
@@ -24,7 +25,6 @@
 #include "driver/dex_compilation_unit.h"
 #include "optimizing_compiler_stats.h"
 #include "primitive.h"
-#include "utils/growable_array.h"
 #include "nodes.h"
 
 namespace art {
@@ -43,8 +43,8 @@
                 const uint8_t* interpreter_metadata,
                 Handle<mirror::DexCache> dex_cache)
       : arena_(graph->GetArena()),
-        branch_targets_(graph->GetArena(), 0),
-        locals_(graph->GetArena(), 0),
+        branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
         entry_block_(nullptr),
         exit_block_(nullptr),
         current_block_(nullptr),
@@ -64,8 +64,8 @@
   // Only for unit testing.
   HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt)
       : arena_(graph->GetArena()),
-        branch_targets_(graph->GetArena(), 0),
-        locals_(graph->GetArena(), 0),
+        branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
         entry_block_(nullptr),
         exit_block_(nullptr),
         current_block_(nullptr),
@@ -90,6 +90,9 @@
 
   static constexpr const char* kBuilderPassName = "builder";
 
+  // The number of entries in a packed switch before we use a jump table.
+  static constexpr uint16_t kSmallSwitchThreshold = 5;
+
  private:
   // Analyzes the dex instruction and adds HInstruction to the graph
   // to execute that instruction. Returns whether the instruction can
@@ -118,21 +121,21 @@
   // instructions and links them to the corresponding catch blocks.
   void InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item);
 
-  // Splits a single edge, inserting a TryBoundary of given `kind` and linking
-  // it to exception handlers of `try_item`.
-  void SplitTryBoundaryEdge(HBasicBlock* predecessor,
-                            HBasicBlock* successor,
-                            HTryBoundary::BoundaryKind kind,
-                            const DexFile::CodeItem& code_item,
-                            const DexFile::TryItem& try_item);
+  // Iterates over the exception handlers of `try_item`, finds the corresponding
+  // catch blocks and makes them successors of `try_boundary`. The order of
+  // successors matches the order in which runtime exception delivery searches
+  // for a handler.
+  void LinkToCatchBlocks(HTryBoundary* try_boundary,
+                         const DexFile::CodeItem& code_item,
+                         const DexFile::TryItem* try_item);
 
   bool CanDecodeQuickenedInfo() const;
   uint16_t LookupQuickenedInfo(uint32_t dex_pc);
 
   void InitializeLocals(uint16_t count);
-  HLocal* GetLocalAt(int register_index) const;
-  void UpdateLocal(int register_index, HInstruction* instruction, uint32_t dex_pc) const;
-  HInstruction* LoadLocal(int register_index, Primitive::Type type, uint32_t dex_pc) const;
+  HLocal* GetLocalAt(uint32_t register_index) const;
+  void UpdateLocal(uint32_t register_index, HInstruction* instruction, uint32_t dex_pc) const;
+  HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const;
   void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc);
   void InitializeParameters(uint16_t number_of_parameters);
   bool NeedsAccessCheck(uint32_t type_index) const;
@@ -184,6 +187,10 @@
   // Builds an instance field access node and returns whether the instruction is supported.
   bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
 
+  void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                        uint32_t dex_pc,
+                                        bool is_put,
+                                        Primitive::Type field_type);
   // Builds a static field access node and returns whether the instruction is supported.
   bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
 
@@ -239,6 +246,12 @@
   // Builds an instruction sequence for a packed switch statement.
   void BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc);
 
+  // Build a switch instruction from a packed switch statement.
+  void BuildSwitchJumpTable(const SwitchTable& table,
+                            const Instruction& instruction,
+                            HInstruction* value,
+                            uint32_t dex_pc);
+
   // Builds an instruction sequence for a sparse switch statement.
   void BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc);
 
@@ -304,9 +317,9 @@
   // A list of the size of the dex code holding block information for
   // the method. If an entry contains a block, then the dex instruction
   // starting at that entry is the first instruction of a new block.
-  GrowableArray<HBasicBlock*> branch_targets_;
+  ArenaVector<HBasicBlock*> branch_targets_;
 
-  GrowableArray<HLocal*> locals_;
+  ArenaVector<HLocal*> locals_;
 
   HBasicBlock* entry_block_;
   HBasicBlock* exit_block_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 3c6a41d..8254277 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -203,13 +203,13 @@
 
 void CodeGenerator::GenerateSlowPaths() {
   size_t code_start = 0;
-  for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) {
+  for (SlowPathCode* slow_path : slow_paths_) {
     if (disasm_info_ != nullptr) {
       code_start = GetAssembler()->CodeSize();
     }
-    slow_paths_.Get(i)->EmitNativeCode(this);
+    slow_path->EmitNativeCode(this);
     if (disasm_info_ != nullptr) {
-      disasm_info_->AddSlowPathInterval(slow_paths_.Get(i), code_start, GetAssembler()->CodeSize());
+      disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize());
     }
   }
 }
@@ -413,6 +413,130 @@
   InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr);
 }
 
+void CodeGenerator::CreateUnresolvedFieldLocationSummary(
+    HInstruction* field_access,
+    Primitive::Type field_type,
+    const FieldAccessCallingConvention& calling_convention) {
+  bool is_instance = field_access->IsUnresolvedInstanceFieldGet()
+      || field_access->IsUnresolvedInstanceFieldSet();
+  bool is_get = field_access->IsUnresolvedInstanceFieldGet()
+      || field_access->IsUnresolvedStaticFieldGet();
+
+  ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetArena();
+  LocationSummary* locations =
+      new (allocator) LocationSummary(field_access, LocationSummary::kCall);
+
+  locations->AddTemp(calling_convention.GetFieldIndexLocation());
+
+  if (is_instance) {
+    // Add the `this` object for instance field accesses.
+    locations->SetInAt(0, calling_convention.GetObjectLocation());
+  }
+
+  // Note that pSetXXStatic/pGetXXStatic always takes/returns an int or int64
+  // regardless of the the type. Because of that we forced to special case
+  // the access to floating point values.
+  if (is_get) {
+    if (Primitive::IsFloatingPointType(field_type)) {
+      // The return value will be stored in regular registers while register
+      // allocator expects it in a floating point register.
+      // Note We don't need to request additional temps because the return
+      // register(s) are already blocked due the call and they may overlap with
+      // the input or field index.
+      // The transfer between the two will be done at codegen level.
+      locations->SetOut(calling_convention.GetFpuLocation(field_type));
+    } else {
+      locations->SetOut(calling_convention.GetReturnLocation(field_type));
+    }
+  } else {
+     size_t set_index = is_instance ? 1 : 0;
+     if (Primitive::IsFloatingPointType(field_type)) {
+      // The set value comes from a float location while the calling convention
+      // expects it in a regular register location. Allocate a temp for it and
+      // make the transfer at codegen.
+      AddLocationAsTemp(calling_convention.GetSetValueLocation(field_type, is_instance), locations);
+      locations->SetInAt(set_index, calling_convention.GetFpuLocation(field_type));
+    } else {
+      locations->SetInAt(set_index,
+          calling_convention.GetSetValueLocation(field_type, is_instance));
+    }
+  }
+}
+
+void CodeGenerator::GenerateUnresolvedFieldAccess(
+    HInstruction* field_access,
+    Primitive::Type field_type,
+    uint32_t field_index,
+    uint32_t dex_pc,
+    const FieldAccessCallingConvention& calling_convention) {
+  LocationSummary* locations = field_access->GetLocations();
+
+  MoveConstant(locations->GetTemp(0), field_index);
+
+  bool is_instance = field_access->IsUnresolvedInstanceFieldGet()
+      || field_access->IsUnresolvedInstanceFieldSet();
+  bool is_get = field_access->IsUnresolvedInstanceFieldGet()
+      || field_access->IsUnresolvedStaticFieldGet();
+
+  if (!is_get && Primitive::IsFloatingPointType(field_type)) {
+    // Copy the float value to be set into the calling convention register.
+    // Note that using directly the temp location is problematic as we don't
+    // support temp register pairs. To avoid boilerplate conversion code, use
+    // the location from the calling convention.
+    MoveLocation(calling_convention.GetSetValueLocation(field_type, is_instance),
+                 locations->InAt(is_instance ? 1 : 0),
+                 (Primitive::Is64BitType(field_type) ? Primitive::kPrimLong : Primitive::kPrimInt));
+  }
+
+  QuickEntrypointEnum entrypoint = kQuickSet8Static;  // Initialize to anything to avoid warnings.
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      entrypoint = is_instance
+          ? (is_get ? kQuickGetBooleanInstance : kQuickSet8Instance)
+          : (is_get ? kQuickGetBooleanStatic : kQuickSet8Static);
+      break;
+    case Primitive::kPrimByte:
+      entrypoint = is_instance
+          ? (is_get ? kQuickGetByteInstance : kQuickSet8Instance)
+          : (is_get ? kQuickGetByteStatic : kQuickSet8Static);
+      break;
+    case Primitive::kPrimShort:
+      entrypoint = is_instance
+          ? (is_get ? kQuickGetShortInstance : kQuickSet16Instance)
+          : (is_get ? kQuickGetShortStatic : kQuickSet16Static);
+      break;
+    case Primitive::kPrimChar:
+      entrypoint = is_instance
+          ? (is_get ? kQuickGetCharInstance : kQuickSet16Instance)
+          : (is_get ? kQuickGetCharStatic : kQuickSet16Static);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      entrypoint = is_instance
+          ? (is_get ? kQuickGet32Instance : kQuickSet32Instance)
+          : (is_get ? kQuickGet32Static : kQuickSet32Static);
+      break;
+    case Primitive::kPrimNot:
+      entrypoint = is_instance
+          ? (is_get ? kQuickGetObjInstance : kQuickSetObjInstance)
+          : (is_get ? kQuickGetObjStatic : kQuickSetObjStatic);
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      entrypoint = is_instance
+          ? (is_get ? kQuickGet64Instance : kQuickSet64Instance)
+          : (is_get ? kQuickGet64Static : kQuickSet64Static);
+      break;
+    default:
+      LOG(FATAL) << "Invalid type " << field_type;
+  }
+  InvokeRuntime(entrypoint, field_access, dex_pc, nullptr);
+
+  if (is_get && Primitive::IsFloatingPointType(field_type)) {
+    MoveLocation(locations->Out(), calling_convention.GetReturnLocation(field_type), field_type);
+  }
+}
+
 void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
   // The DCHECKS below check that a register is not specified twice in
   // the summary. The out location can overlap with an input, so we need
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a1c6db0..a3ebc43 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -143,6 +143,22 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
 };
 
+class FieldAccessCallingConvention {
+ public:
+  virtual Location GetObjectLocation() const = 0;
+  virtual Location GetFieldIndexLocation() const = 0;
+  virtual Location GetReturnLocation(Primitive::Type type) const = 0;
+  virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
+  virtual Location GetFpuLocation(Primitive::Type type) const = 0;
+  virtual ~FieldAccessCallingConvention() {}
+
+ protected:
+  FieldAccessCallingConvention() {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
+};
+
 class CodeGenerator {
  public:
   // Compiles the graph to executable instructions. Returns whether the compilation
@@ -177,6 +193,9 @@
   virtual void Bind(HBasicBlock* block) = 0;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
   virtual void MoveConstant(Location destination, int32_t value) = 0;
+  virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
+  virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
+
   virtual Assembler* GetAssembler() = 0;
   virtual const Assembler& GetAssembler() const = 0;
   virtual size_t GetWordSize() const = 0;
@@ -261,7 +280,7 @@
   bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
 
   void AddSlowPath(SlowPathCode* slow_path) {
-    slow_paths_.Add(slow_path);
+    slow_paths_.push_back(slow_path);
   }
 
   void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; }
@@ -385,6 +404,18 @@
 
   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
 
+  void CreateUnresolvedFieldLocationSummary(
+      HInstruction* field_access,
+      Primitive::Type field_type,
+      const FieldAccessCallingConvention& calling_convention);
+
+  void GenerateUnresolvedFieldAccess(
+      HInstruction* field_access,
+      Primitive::Type field_type,
+      uint32_t field_index,
+      uint32_t dex_pc,
+      const FieldAccessCallingConvention& calling_convention);
+
   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
 
@@ -425,9 +456,12 @@
         core_spill_mask_(0),
         fpu_spill_mask_(0),
         first_register_slot_in_slow_path_(0),
-        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
-        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
-        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)),
+        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
+                                                                    kArenaAllocCodeGenerator)),
+        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
+                                                                   kArenaAllocCodeGenerator)),
+        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs,
+                                                                    kArenaAllocCodeGenerator)),
         number_of_core_registers_(number_of_core_registers),
         number_of_fpu_registers_(number_of_fpu_registers),
         number_of_register_pairs_(number_of_register_pairs),
@@ -441,10 +475,12 @@
         graph_(graph),
         compiler_options_(compiler_options),
         src_map_(nullptr),
-        slow_paths_(graph->GetArena(), 8),
+        slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         current_block_index_(0),
         is_leaf_(true),
-        requires_current_method_(false) {}
+        requires_current_method_(false) {
+    slow_paths_.reserve(8);
+  }
 
   // Register allocation logic.
   void AllocateRegistersLocally(HInstruction* instruction) const;
@@ -485,8 +521,20 @@
     return instruction_set == kX86 || instruction_set == kX86_64;
   }
 
-  // Arm64 has its own type for a label, so we need to templatize this method
+  // Arm64 has its own type for a label, so we need to templatize these methods
   // to share the logic.
+
+  template <typename LabelType>
+  LabelType* CommonInitializeLabels() {
+    size_t size = GetGraph()->GetBlocks().size();
+    LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
+                                                                      kArenaAllocCodeGenerator);
+    for (size_t i = 0; i != size; ++i) {
+      new(labels + i) LabelType();
+    }
+    return labels;
+  }
+
   template <typename LabelType>
   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
     block = FirstNonEmptyBlock(block);
@@ -539,7 +587,7 @@
 
   // Native to dex_pc map used for native debugging/profiling tools.
   DefaultSrcMap* src_map_;
-  GrowableArray<SlowPathCode*> slow_paths_;
+  ArenaVector<SlowPathCode*> slow_paths_;
 
   // The current block index in `block_order_` of the block
   // we are generating code for.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d431acf..cf7f5f4 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -361,6 +361,51 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
 };
 
+class ArraySetSlowPathARM : public SlowPathCode {
+ public:
+  explicit ArraySetSlowPathARM(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
+};
+
 #undef __
 #define __ down_cast<ArmAssembler*>(GetAssembler())->
 
@@ -428,19 +473,25 @@
                     kNumberOfRegisterPairs,
                     ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
                                         arraysize(kCoreCalleeSaves)),
-                    ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
-                                        arraysize(kFpuCalleeSaves)),
+                    graph->IsDebuggable()
+                        // If the graph is debuggable, we need to save the fpu registers ourselves,
+                        // as the stubs do not do it.
+                        ? 0
+                        : ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+                                              arraysize(kFpuCalleeSaves)),
                     compiler_options,
                     stats),
-      block_labels_(graph->GetArena(), 0),
+      block_labels_(nullptr),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
       assembler_(),
       isa_features_(isa_features),
-      method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
-      call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
-      relative_call_patches_(graph->GetArena()->Adapter()) {
+      method_patches_(MethodReferenceComparator(),
+                      graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      call_patches_(MethodReferenceComparator(),
+                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -459,8 +510,8 @@
   for (HBasicBlock* block : *block_order_) {
     // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid
     // FirstNonEmptyBlock() which could lead to adjusting a label more than once.
-    DCHECK_LT(static_cast<size_t>(block->GetBlockId()), block_labels_.Size());
-    Label* block_label = &block_labels_.GetRawStorage()[block->GetBlockId()];
+    DCHECK_LT(block->GetBlockId(), GetGraph()->GetBlocks().size());
+    Label* block_label = &block_labels_[block->GetBlockId()];
     DCHECK_EQ(block_label->IsBound(), !block->IsSingleJump());
     if (block_label->IsBound()) {
       __ AdjustLabelPosition(block_label);
@@ -855,6 +906,10 @@
           Primitive::kPrimInt);
     } else if (source.IsFpuRegister()) {
       UNIMPLEMENTED(FATAL);
+    } else if (source.IsFpuRegisterPair()) {
+      __ vmovrrd(destination.AsRegisterPairLow<Register>(),
+                 destination.AsRegisterPairHigh<Register>(),
+                 FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
       DCHECK(ExpectedPairLayout(destination));
@@ -866,6 +921,10 @@
       __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
                          SP,
                          source.GetStackIndex());
+    } else if (source.IsRegisterPair()) {
+      __ vmovdrr(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+                 source.AsRegisterPairLow<Register>(),
+                 source.AsRegisterPairHigh<Register>());
     } else {
       UNIMPLEMENTED(FATAL);
     }
@@ -987,6 +1046,25 @@
   __ LoadImmediate(location.AsRegister<Register>(), value);
 }
 
+void CodeGeneratorARM::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
+  if (Primitive::Is64BitType(dst_type)) {
+    Move64(dst, src);
+  } else {
+    Move32(dst, src);
+  }
+}
+
+void CodeGeneratorARM::AddLocationAsTemp(Location location, LocationSummary* locations) {
+  if (location.IsRegister()) {
+    locations->AddTemp(location);
+  } else if (location.IsRegisterPair()) {
+    locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
+    locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
+  } else {
+    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+  }
+}
+
 void CodeGeneratorARM::InvokeRuntime(QuickEntrypointEnum entrypoint,
                                      HInstruction* instruction,
                                      uint32_t dex_pc,
@@ -3554,6 +3632,74 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
+void LocationsBuilderARM::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionARM calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARM::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionARM calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARM::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionARM calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARM::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionARM calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARM::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionARM calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARM::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionARM calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARM::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionARM calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARM::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionARM calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
 void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) {
   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
       ? LocationSummary::kCallOnSlowPath
@@ -3744,38 +3890,32 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      instruction,
+      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (Primitive::IsFloatingPointType(value_type)) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RequiresRegister());
-    }
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
 
-    if (needs_write_barrier) {
-      // Temporary registers for the write barrier.
-      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-      locations->AddTemp(Location::RequiresRegister());
-    }
+  if (needs_write_barrier) {
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register array = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call = locations->CanCall();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -3787,9 +3927,9 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ StoreToOffset(kStoreByte, value, obj, offset);
+        __ StoreToOffset(kStoreByte, value, array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>()));
         __ StoreToOffset(kStoreByte, value, IP, data_offset);
       }
       break;
@@ -3802,55 +3942,133 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ StoreToOffset(kStoreHalfword, value, obj, offset);
+        __ StoreToOffset(kStoreHalfword, value, array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
         __ StoreToOffset(kStoreHalfword, value, IP, data_offset);
       }
       break;
     }
 
-    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
-        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        Register value = locations->InAt(2).AsRegister<Register>();
-        Register source = value;
-        if (kPoisonHeapReferences && needs_write_barrier) {
-          // Note that in the case where `value` is a null reference,
-          // we do not enter this block, as a null reference does not
-          // need poisoning.
-          DCHECK_EQ(value_type, Primitive::kPrimNot);
-          Register temp = locations->GetTemp(0).AsRegister<Register>();
-          __ Mov(temp, value);
-          __ PoisonHeapReference(temp);
-          source = temp;
-        }
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Register value = locations->InAt(2).AsRegister<Register>();
+      Register source = value;
+
+      if (instruction->InputAt(2)->IsNullConstant()) {
+        // Just setting null.
         if (index.IsConstant()) {
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          __ StoreToOffset(kStoreWord, source, obj, offset);
+          __ StoreToOffset(kStoreWord, source, array, offset);
         } else {
           DCHECK(index.IsRegister()) << index;
-          __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+          __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
           __ StoreToOffset(kStoreWord, source, IP, data_offset);
         }
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        if (needs_write_barrier) {
-          DCHECK_EQ(value_type, Primitive::kPrimNot);
-          Register temp = locations->GetTemp(0).AsRegister<Register>();
-          Register card = locations->GetTemp(1).AsRegister<Register>();
-          codegen_->MarkGCCard(temp, card, obj, value, instruction->GetValueCanBeNull());
-        }
-      } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        // Note: if heap poisoning is enabled, pAputObject takes cares
-        // of poisoning the reference.
-        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
-                                instruction,
-                                instruction->GetDexPc(),
-                                nullptr);
+        break;
       }
+
+      DCHECK(needs_write_barrier);
+      Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+      Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      Label done;
+      SlowPathCode* slow_path = nullptr;
+
+      if (may_need_runtime_call) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          Label non_zero;
+          __ CompareAndBranchIfNonZero(value, &non_zero);
+          if (index.IsConstant()) {
+            size_t offset =
+               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+            __ StoreToOffset(kStoreWord, value, array, offset);
+          } else {
+            DCHECK(index.IsRegister()) << index;
+            __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+            __ StoreToOffset(kStoreWord, value, IP, data_offset);
+          }
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ b(&done);
+          __ Bind(&non_zero);
+        }
+
+        __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ MaybeUnpoisonHeapReference(temp1);
+        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+        __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+        // No need to poison/unpoison, we're comparing two poisoined references.
+        __ cmp(temp1, ShifterOperand(temp2));
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          Label do_put;
+          __ b(&do_put, EQ);
+          __ MaybeUnpoisonHeapReference(temp1);
+          __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+          // No need to poison/unpoison, we're comparing against null.
+          __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ b(slow_path->GetEntryLabel(), NE);
+        }
+      }
+
+      if (kPoisonHeapReferences) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        __ Mov(temp1, value);
+        __ PoisonHeapReference(temp1);
+        source = temp1;
+      }
+
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ StoreToOffset(kStoreWord, source, array, offset);
+      } else {
+        DCHECK(index.IsRegister()) << index;
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+        __ StoreToOffset(kStoreWord, source, IP, data_offset);
+      }
+
+      if (!may_need_runtime_call) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      codegen_->MarkGCCard(temp1, temp2, array, value, instruction->GetValueCanBeNull());
+
+      if (done.IsLinked()) {
+        __ Bind(&done);
+      }
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
+      }
+
+      break;
+    }
+
+    case Primitive::kPrimInt: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Register value = locations->InAt(2).AsRegister<Register>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ StoreToOffset(kStoreWord, value, array, offset);
+      } else {
+        DCHECK(index.IsRegister()) << index;
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+        __ StoreToOffset(kStoreWord, value, IP, data_offset);
+      }
+
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -3860,9 +4078,9 @@
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset);
+        __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
         __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), IP, data_offset);
       }
       break;
@@ -3874,9 +4092,9 @@
       DCHECK(value.IsFpuRegister());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ StoreSToOffset(value.AsFpuRegister<SRegister>(), obj, offset);
+        __ StoreSToOffset(value.AsFpuRegister<SRegister>(), array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
         __ StoreSToOffset(value.AsFpuRegister<SRegister>(), IP, data_offset);
       }
       break;
@@ -3888,9 +4106,9 @@
       DCHECK(value.IsFpuRegisterPair());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+        __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), array, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
+        __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
         __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
       }
 
@@ -4034,7 +4252,8 @@
 }
 
 void ParallelMoveResolverARM::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
@@ -4166,7 +4385,8 @@
 }
 
 void ParallelMoveResolverARM::EmitSwap(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
@@ -4477,7 +4697,11 @@
       break;
     }
     case TypeCheckKind::kArrayObjectCheck: {
-      // Just need to check that the object's class is a non primitive array.
+      // Do an exact check.
+      Label exact_check;
+      __ cmp(out, ShifterOperand(cls));
+      __ b(&exact_check, EQ);
+      // Otherwise, we need to check that the object's class is a non primitive array.
       __ LoadFromOffset(kLoadWord, out, out, component_offset);
       __ MaybeUnpoisonHeapReference(out);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -4485,6 +4709,7 @@
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
       __ CompareAndBranchIfNonZero(out, &zero);
+      __ Bind(&exact_check);
       __ LoadImmediate(out, 1);
       __ b(&done);
       break;
@@ -4623,20 +4848,22 @@
     }
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      Label loop, success;
+      Label loop;
       __ Bind(&loop);
       __ cmp(temp, ShifterOperand(cls));
-      __ b(&success, EQ);
+      __ b(&done, EQ);
       __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
       __ MaybeUnpoisonHeapReference(temp);
       __ CompareAndBranchIfNonZero(temp, &loop);
       // Jump to the slow path to throw the exception.
       __ b(slow_path->GetEntryLabel());
-      __ Bind(&success);
       break;
     }
     case TypeCheckKind::kArrayObjectCheck: {
-      // Just need to check that the object's class is a non primitive array.
+      // Do an exact check.
+      __ cmp(temp, ShifterOperand(cls));
+      __ b(&done, EQ);
+      // Otherwise, we need to check that the object's class is a non primitive array.
       __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
       __ MaybeUnpoisonHeapReference(temp);
       __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
@@ -4946,6 +5173,33 @@
   // Will be generated at use site.
 }
 
+// Simple implementation of packed switch - generate cascaded compare/jumps.
+void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  int32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Create a series of compare/jumps.
+  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+  for (int32_t i = 0; i < num_entries; i++) {
+    GenerateCompareWithImmediate(value_reg, lower_bound + i);
+    __ b(codegen_->GetLabelOf(successors.at(i)), EQ);
+  }
+
+  // And the default for any other value.
+  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+    __ b(codegen_->GetLabelOf(default_block));
+  }
+}
+
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
     DCHECK(type == Primitive::kPrimVoid);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1d98789..16d1d38 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -96,6 +96,38 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM);
 };
 
+class FieldAccessCallingConventionARM : public FieldAccessCallingConvention {
+ public:
+  FieldAccessCallingConventionARM() {}
+
+  Location GetObjectLocation() const OVERRIDE {
+    return Location::RegisterLocation(R1);
+  }
+  Location GetFieldIndexLocation() const OVERRIDE {
+    return Location::RegisterLocation(R0);
+  }
+  Location GetReturnLocation(Primitive::Type type) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::RegisterPairLocation(R0, R1)
+        : Location::RegisterLocation(R0);
+  }
+  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::RegisterPairLocation(R2, R3)
+        : (is_instance
+            ? Location::RegisterLocation(R2)
+            : Location::RegisterLocation(R1));
+  }
+  Location GetFpuLocation(Primitive::Type type) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::FpuRegisterPairLocation(S0, S1)
+        : Location::FpuRegisterLocation(S0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM);
+};
+
 class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap {
  public:
   ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen)
@@ -225,6 +257,9 @@
   void Bind(HBasicBlock* block) OVERRIDE;
   void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
+  void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
+  void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
@@ -299,11 +334,11 @@
   void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+    return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_.SetSize(GetGraph()->GetBlocks().size());
+    block_labels_ = CommonInitializeLabels<Label>();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -335,7 +370,7 @@
   Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
 
   // Labels for each block that will be compiled.
-  GrowableArray<Label> block_labels_;
+  Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 580e93e..af5bbaa 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -19,7 +19,6 @@
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "art_method.h"
 #include "code_generator_utils.h"
-#include "common_arm64.h"
 #include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
@@ -119,8 +118,11 @@
 
   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize,
       register_set->GetCoreRegisters() & (~callee_saved_core_registers.list()));
-  CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize,
-      register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.list()));
+  CPURegList fp_list = CPURegList(
+      CPURegister::kFPRegister,
+      kDRegSize,
+      register_set->GetFloatingPointRegisters()
+          & (~(codegen->GetGraph()->IsDebuggable() ? 0 : callee_saved_fp_registers.list())));
 
   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
@@ -477,7 +479,7 @@
 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
-    : instruction_(instruction) {}
+      : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
@@ -496,6 +498,52 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
 };
 
+class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  explicit ArraySetSlowPathARM64(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        LocationFrom(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        LocationFrom(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
+};
+
 #undef __
 
 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
@@ -534,7 +582,9 @@
                     kNumberOfAllocatableFPRegisters,
                     kNumberOfAllocatableRegisterPairs,
                     callee_saved_core_registers.list(),
-                    callee_saved_fp_registers.list(),
+                    // If the graph is debuggable, we need to save the fpu registers ourselves,
+                    // as the stubs do not do it.
+                    graph->IsDebuggable() ? 0 : callee_saved_fp_registers.list(),
                     compiler_options,
                     stats),
       block_labels_(nullptr),
@@ -542,11 +592,14 @@
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
       isa_features_(isa_features),
-      uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter()),
-      method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
-      call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
-      relative_call_patches_(graph->GetArena()->Adapter()),
-      pc_rel_dex_cache_patches_(graph->GetArena()->Adapter()) {
+      uint64_literals_(std::less<uint64_t>(),
+                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_patches_(MethodReferenceComparator(),
+                      graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      call_patches_(MethodReferenceComparator(),
+                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
   AddAllocatedRegister(LocationFrom(lr));
 }
@@ -610,8 +663,9 @@
 }
 
 void ParallelMoveResolverARM64::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
-  codegen_->MoveLocation(move->GetDestination(), move->GetSource());
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
+  codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid);
 }
 
 void CodeGeneratorARM64::GenerateFrameEntry() {
@@ -695,7 +749,9 @@
   }
 
   if (instruction->IsCurrentMethod()) {
-    MoveLocation(location, Location::DoubleStackSlot(kCurrentMethodStackOffset));
+    MoveLocation(location,
+                 Location::DoubleStackSlot(kCurrentMethodStackOffset),
+                 Primitive::kPrimVoid);
   } else if (locations != nullptr && locations->Out().Equals(location)) {
     return;
   } else if (instruction->IsIntConstant()
@@ -738,6 +794,14 @@
   __ Mov(RegisterFrom(location, Primitive::kPrimInt), value);
 }
 
+void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
+  if (location.IsRegister()) {
+    locations->AddTemp(location);
+  } else {
+    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+  }
+}
+
 Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const {
   Primitive::Type type = load->GetType();
 
@@ -888,7 +952,9 @@
          (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
 }
 
-void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Primitive::Type type) {
+void CodeGeneratorARM64::MoveLocation(Location destination,
+                                      Location source,
+                                      Primitive::Type dst_type) {
   if (source.Equals(destination)) {
     return;
   }
@@ -897,7 +963,7 @@
   // locations. When moving from and to a register, the argument type can be
   // used to generate 32bit instead of 64bit moves. In debug mode we also
   // checks the coherency of the locations and the type.
-  bool unspecified_type = (type == Primitive::kPrimVoid);
+  bool unspecified_type = (dst_type == Primitive::kPrimVoid);
 
   if (destination.IsRegister() || destination.IsFpuRegister()) {
     if (unspecified_type) {
@@ -907,30 +973,44 @@
                                   || src_cst->IsFloatConstant()
                                   || src_cst->IsNullConstant()))) {
         // For stack slots and 32bit constants, a 64bit type is appropriate.
-        type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
+        dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
       } else {
         // If the source is a double stack slot or a 64bit constant, a 64bit
         // type is appropriate. Else the source is a register, and since the
         // type has not been specified, we chose a 64bit type to force a 64bit
         // move.
-        type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
+        dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
       }
     }
-    DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(type)) ||
-           (destination.IsRegister() && !Primitive::IsFloatingPointType(type)));
-    CPURegister dst = CPURegisterFrom(destination, type);
+    DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) ||
+           (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type)));
+    CPURegister dst = CPURegisterFrom(destination, dst_type);
     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
       __ Ldr(dst, StackOperandFrom(source));
     } else if (source.IsConstant()) {
-      DCHECK(CoherentConstantAndType(source, type));
+      DCHECK(CoherentConstantAndType(source, dst_type));
       MoveConstant(dst, source.GetConstant());
-    } else {
+    } else if (source.IsRegister()) {
       if (destination.IsRegister()) {
-        __ Mov(Register(dst), RegisterFrom(source, type));
+        __ Mov(Register(dst), RegisterFrom(source, dst_type));
       } else {
         DCHECK(destination.IsFpuRegister());
-        __ Fmov(FPRegister(dst), FPRegisterFrom(source, type));
+        Primitive::Type source_type = Primitive::Is64BitType(dst_type)
+            ? Primitive::kPrimLong
+            : Primitive::kPrimInt;
+        __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
+      }
+    } else {
+      DCHECK(source.IsFpuRegister());
+      if (destination.IsRegister()) {
+        Primitive::Type source_type = Primitive::Is64BitType(dst_type)
+            ? Primitive::kPrimDouble
+            : Primitive::kPrimFloat;
+        __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
+      } else {
+        DCHECK(destination.IsFpuRegister());
+        __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
       }
     }
   } else {  // The destination is not a register. It must be a stack slot.
@@ -938,16 +1018,17 @@
     if (source.IsRegister() || source.IsFpuRegister()) {
       if (unspecified_type) {
         if (source.IsRegister()) {
-          type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
+          dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
         } else {
-          type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
+          dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
         }
       }
-      DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(type)) &&
-             (source.IsFpuRegister() == Primitive::IsFloatingPointType(type)));
-      __ Str(CPURegisterFrom(source, type), StackOperandFrom(destination));
+      DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) &&
+             (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type)));
+      __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
     } else if (source.IsConstant()) {
-      DCHECK(unspecified_type || CoherentConstantAndType(source, type)) << source << " " << type;
+      DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
+          << source << " " << dst_type;
       UseScratchRegisterScope temps(GetVIXLAssembler());
       HConstant* src_cst = source.GetConstant();
       CPURegister temp;
@@ -1551,76 +1632,136 @@
 }
 
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
-  if (instruction->NeedsTypeCheck()) {
-    LocationSummary* locations =
-        new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
-    LocationSummary* locations =
-        new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RequiresRegister());
-    }
+    locations->SetInAt(2, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
   LocationSummary* locations = instruction->GetLocations();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call = locations->CanCall();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
-  if (needs_runtime_call) {
-    // Note: if heap poisoning is enabled, pAputObject takes cares
-    // of poisoning the reference.
-    codegen_->InvokeRuntime(
-        QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr);
-    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+  Register array = InputRegisterAt(instruction, 0);
+  CPURegister value = InputCPURegisterAt(instruction, 2);
+  CPURegister source = value;
+  Location index = locations->InAt(1);
+  size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
+  MemOperand destination = HeapOperand(array);
+  MacroAssembler* masm = GetVIXLAssembler();
+  BlockPoolsScope block_pools(masm);
+
+  if (!needs_write_barrier) {
+    DCHECK(!may_need_runtime_call);
+    if (index.IsConstant()) {
+      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
+      destination = HeapOperand(array, offset);
+    } else {
+      UseScratchRegisterScope temps(masm);
+      Register temp = temps.AcquireSameSizeAs(array);
+      __ Add(temp, array, offset);
+      destination = HeapOperand(temp,
+                                XRegisterFrom(index),
+                                LSL,
+                                Primitive::ComponentSizeShift(value_type));
+    }
+    codegen_->Store(value_type, value, destination);
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   } else {
-    Register obj = InputRegisterAt(instruction, 0);
-    CPURegister value = InputCPURegisterAt(instruction, 2);
-    CPURegister source = value;
-    Location index = locations->InAt(1);
-    size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
-    MemOperand destination = HeapOperand(obj);
-    MacroAssembler* masm = GetVIXLAssembler();
-    BlockPoolsScope block_pools(masm);
+    DCHECK(needs_write_barrier);
+    vixl::Label done;
+    SlowPathCodeARM64* slow_path = nullptr;
     {
       // We use a block to end the scratch scope before the write barrier, thus
       // freeing the temporary registers so they can be used in `MarkGCCard`.
       UseScratchRegisterScope temps(masm);
-
-      if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-        DCHECK(value.IsW());
-        Register temp = temps.AcquireW();
-        __ Mov(temp, value.W());
-        GetAssembler()->PoisonHeapReference(temp.W());
-        source = temp;
-      }
-
+      Register temp = temps.AcquireSameSizeAs(array);
       if (index.IsConstant()) {
         offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
-        destination = HeapOperand(obj, offset);
+        destination = HeapOperand(array, offset);
       } else {
-        Register temp = temps.AcquireSameSizeAs(obj);
-        __ Add(temp, obj, offset);
         destination = HeapOperand(temp,
                                   XRegisterFrom(index),
                                   LSL,
                                   Primitive::ComponentSizeShift(value_type));
       }
 
-      codegen_->Store(value_type, source, destination);
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+
+      if (may_need_runtime_call) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          vixl::Label non_zero;
+          __ Cbnz(Register(value), &non_zero);
+          if (!index.IsConstant()) {
+            __ Add(temp, array, offset);
+          }
+          __ Str(wzr, destination);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ B(&done);
+          __ Bind(&non_zero);
+        }
+
+        Register temp2 = temps.AcquireSameSizeAs(array);
+        __ Ldr(temp, HeapOperand(array, class_offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        GetAssembler()->MaybeUnpoisonHeapReference(temp);
+        __ Ldr(temp, HeapOperand(temp, component_offset));
+        __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+        // No need to poison/unpoison, we're comparing two poisoned references.
+        __ Cmp(temp, temp2);
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          vixl::Label do_put;
+          __ B(eq, &do_put);
+          GetAssembler()->MaybeUnpoisonHeapReference(temp);
+          __ Ldr(temp, HeapOperand(temp, super_offset));
+          // No need to unpoison, we're comparing against null.
+          __ Cbnz(temp, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ B(ne, slow_path->GetEntryLabel());
+        }
+        temps.Release(temp2);
+      }
+
+      if (kPoisonHeapReferences) {
+        Register temp2 = temps.AcquireSameSizeAs(array);
+          DCHECK(value.IsW());
+        __ Mov(temp2, value.W());
+        GetAssembler()->PoisonHeapReference(temp2);
+        source = temp2;
+      }
+
+      if (!index.IsConstant()) {
+        __ Add(temp, array, offset);
+      }
+      __ Str(source, destination);
+
+      if (!may_need_runtime_call) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
     }
-    if (CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue())) {
-      codegen_->MarkGCCard(obj, value.W(), instruction->GetValueCanBeNull());
+
+    codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
+
+    if (done.IsLinked()) {
+      __ Bind(&done);
+    }
+
+    if (slow_path != nullptr) {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -2342,7 +2483,11 @@
       break;
     }
     case TypeCheckKind::kArrayObjectCheck: {
-      // Just need to check that the object's class is a non primitive array.
+      // Do an exact check.
+      vixl::Label exact_check;
+      __ Cmp(out, cls);
+      __ B(eq, &exact_check);
+      // Otherwise, we need to check that the object's class is a non primitive array.
       __ Ldr(out, HeapOperand(out, component_offset));
       GetAssembler()->MaybeUnpoisonHeapReference(out);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -2350,6 +2495,7 @@
       __ Ldrh(out, HeapOperand(out, primitive_offset));
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
       __ Cbnz(out, &zero);
+      __ Bind(&exact_check);
       __ Mov(out, 1);
       __ B(&done);
       break;
@@ -2489,20 +2635,22 @@
     }
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      vixl::Label loop, success;
+      vixl::Label loop;
       __ Bind(&loop);
       __ Cmp(temp, cls);
-      __ B(eq, &success);
+      __ B(eq, &done);
       __ Ldr(temp, HeapOperand(temp, super_offset));
       GetAssembler()->MaybeUnpoisonHeapReference(temp);
       __ Cbnz(temp, &loop);
       // Jump to the slow path to throw the exception.
       __ B(slow_path->GetEntryLabel());
-      __ Bind(&success);
       break;
     }
     case TypeCheckKind::kArrayObjectCheck: {
-      // Just need to check that the object's class is a non primitive array.
+      // Do an exact check.
+      __ Cmp(temp, cls);
+      __ B(eq, &done);
+      // Otherwise, we need to check that the object's class is a non primitive array.
       __ Ldr(temp, HeapOperand(temp, component_offset));
       GetAssembler()->MaybeUnpoisonHeapReference(temp);
       __ Cbz(temp, slow_path->GetEntryLabel());
@@ -3386,6 +3534,74 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
+void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionARM64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionARM64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionARM64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionARM64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionARM64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionARM64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionARM64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionARM64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
   new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
 }
@@ -3533,6 +3749,38 @@
   // Will be generated at use site.
 }
 
+// Simple implementation of packed switch - generate cascaded compare/jumps.
+void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  int32_t num_entries = switch_instr->GetNumEntries();
+  Register value_reg = InputRegisterAt(switch_instr, 0);
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Create a series of compare/jumps.
+  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+  for (int32_t i = 0; i < num_entries; i++) {
+    int32_t case_value = lower_bound + i;
+    vixl::Label* succ = codegen_->GetLabelOf(successors.at(i));
+    if (case_value == 0) {
+      __ Cbz(value_reg, succ);
+    } else {
+      __ Cmp(value_reg, vixl::Operand(case_value));
+      __ B(eq, succ);
+    }
+  }
+
+  // And the default for any other value.
+  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+    __ B(codegen_->GetLabelOf(default_block));
+  }
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 8967108..a068b48 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
 
 #include "code_generator.h"
+#include "common_arm64.h"
 #include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
 #include "nodes.h"
@@ -141,6 +142,34 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
 };
 
+class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
+ public:
+  FieldAccessCallingConventionARM64() {}
+
+  Location GetObjectLocation() const OVERRIDE {
+    return helpers::LocationFrom(vixl::x1);
+  }
+  Location GetFieldIndexLocation() const OVERRIDE {
+    return helpers::LocationFrom(vixl::x0);
+  }
+  Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return helpers::LocationFrom(vixl::x0);
+  }
+  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? helpers::LocationFrom(vixl::x2)
+        : (is_instance
+            ? helpers::LocationFrom(vixl::x2)
+            : helpers::LocationFrom(vixl::x1));
+  }
+  Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return helpers::LocationFrom(vixl::d0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
+};
+
 class InstructionCodeGeneratorARM64 : public HGraphVisitor {
  public:
   InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
@@ -326,12 +355,7 @@
   }
 
   void Initialize() OVERRIDE {
-    HGraph* graph = GetGraph();
-    int length = graph->GetBlocks().size();
-    block_labels_ = graph->GetArena()->AllocArray<vixl::Label>(length);
-    for (int i = 0; i < length; ++i) {
-      new(block_labels_ + i) vixl::Label();
-    }
+    block_labels_ = CommonInitializeLabels<vixl::Label>();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -339,10 +363,9 @@
   // Code generation helpers.
   void MoveConstant(vixl::CPURegister destination, HConstant* constant);
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
-  // The type is optional. When specified it must be coherent with the
-  // locations, and is used for optimisation and debugging.
-  void MoveLocation(Location destination, Location source,
-                    Primitive::Type type = Primitive::kPrimVoid);
+  void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
+  void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+
   void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
   void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
   void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src);
@@ -400,7 +423,7 @@
   };
 
   // Labels for each block that will be compiled.
-  vixl::Label* block_labels_;
+  vixl::Label* block_labels_;  // Indexed by block id.
   vixl::Label frame_entry_label_;
 
   LocationsBuilderARM64 location_builder_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 4722e42..e95d283 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -20,7 +20,9 @@
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
+#include "intrinsics_mips64.h"
 #include "art_method.h"
+#include "code_generator_utils.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "offsets.h"
@@ -36,12 +38,8 @@
 static constexpr GpuRegister kMethodRegisterArgument = A0;
 
 // We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr GpuRegister TMP = T8;
 static constexpr FpuRegister FTMP = F8;
 
-// ART Thread Register.
-static constexpr GpuRegister TR = S1;
-
 Location Mips64ReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
@@ -430,7 +428,7 @@
                                         arraysize(kFpuCalleeSaves)),
                     compiler_options,
                     stats),
-      block_labels_(graph->GetArena(), 0),
+      block_labels_(nullptr),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
@@ -452,12 +450,14 @@
 }
 
 void ParallelMoveResolverMIPS64::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
 }
 
 void ParallelMoveResolverMIPS64::EmitSwap(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
 }
 
@@ -617,7 +617,7 @@
 
 void CodeGeneratorMIPS64::MoveLocation(Location destination,
                                        Location source,
-                                       Primitive::Type type) {
+                                       Primitive::Type dst_type) {
   if (source.Equals(destination)) {
     return;
   }
@@ -625,7 +625,7 @@
   // A valid move can always be inferred from the destination and source
   // locations. When moving from and to a register, the argument type can be
   // used to generate 32bit instead of 64bit moves.
-  bool unspecified_type = (type == Primitive::kPrimVoid);
+  bool unspecified_type = (dst_type == Primitive::kPrimVoid);
   DCHECK_EQ(unspecified_type, false);
 
   if (destination.IsRegister() || destination.IsFpuRegister()) {
@@ -636,21 +636,21 @@
                                   || src_cst->IsFloatConstant()
                                   || src_cst->IsNullConstant()))) {
         // For stack slots and 32bit constants, a 64bit type is appropriate.
-        type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
+        dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
       } else {
         // If the source is a double stack slot or a 64bit constant, a 64bit
         // type is appropriate. Else the source is a register, and since the
         // type has not been specified, we chose a 64bit type to force a 64bit
         // move.
-        type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
+        dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
       }
     }
-    DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(type)) ||
-           (destination.IsRegister() && !Primitive::IsFloatingPointType(type)));
+    DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) ||
+           (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type)));
     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
       // Move to GPR/FPR from stack
       LoadOperandType load_type = source.IsStackSlot() ? kLoadWord : kLoadDoubleword;
-      if (Primitive::IsFloatingPointType(type)) {
+      if (Primitive::IsFloatingPointType(dst_type)) {
         __ LoadFpuFromOffset(load_type,
                              destination.AsFpuRegister<FpuRegister>(),
                              SP,
@@ -665,31 +665,47 @@
     } else if (source.IsConstant()) {
       // Move to GPR/FPR from constant
       GpuRegister gpr = AT;
-      if (!Primitive::IsFloatingPointType(type)) {
+      if (!Primitive::IsFloatingPointType(dst_type)) {
         gpr = destination.AsRegister<GpuRegister>();
       }
-      if (type == Primitive::kPrimInt || type == Primitive::kPrimFloat) {
+      if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) {
         __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant()));
       } else {
         __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant()));
       }
-      if (type == Primitive::kPrimFloat) {
+      if (dst_type == Primitive::kPrimFloat) {
         __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>());
-      } else if (type == Primitive::kPrimDouble) {
+      } else if (dst_type == Primitive::kPrimDouble) {
         __ Dmtc1(gpr, destination.AsFpuRegister<FpuRegister>());
       }
-    } else {
+    } else if (source.IsRegister()) {
       if (destination.IsRegister()) {
         // Move to GPR from GPR
         __ Move(destination.AsRegister<GpuRegister>(), source.AsRegister<GpuRegister>());
       } else {
+        DCHECK(destination.IsFpuRegister());
+        if (Primitive::Is64BitType(dst_type)) {
+          __ Dmtc1(source.AsRegister<GpuRegister>(), destination.AsFpuRegister<FpuRegister>());
+        } else {
+          __ Mtc1(source.AsRegister<GpuRegister>(), destination.AsFpuRegister<FpuRegister>());
+        }
+      }
+    } else if (source.IsFpuRegister()) {
+      if (destination.IsFpuRegister()) {
         // Move to FPR from FPR
-        if (type == Primitive::kPrimFloat) {
+        if (dst_type == Primitive::kPrimFloat) {
           __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
         } else {
-          DCHECK_EQ(type, Primitive::kPrimDouble);
+          DCHECK_EQ(dst_type, Primitive::kPrimDouble);
           __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
         }
+      } else {
+        DCHECK(destination.IsRegister());
+        if (Primitive::Is64BitType(dst_type)) {
+          __ Dmfc1(destination.AsRegister<GpuRegister>(), source.AsFpuRegister<FpuRegister>());
+        } else {
+          __ Mfc1(destination.AsRegister<GpuRegister>(), source.AsFpuRegister<FpuRegister>());
+        }
       }
     }
   } else {  // The destination is not a register. It must be a stack slot.
@@ -697,13 +713,13 @@
     if (source.IsRegister() || source.IsFpuRegister()) {
       if (unspecified_type) {
         if (source.IsRegister()) {
-          type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
+          dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
         } else {
-          type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
+          dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
         }
       }
-      DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(type)) &&
-             (source.IsFpuRegister() == Primitive::IsFloatingPointType(type)));
+      DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) &&
+             (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type)));
       // Move to stack from GPR/FPR
       StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
       if (source.IsRegister()) {
@@ -861,6 +877,14 @@
   __ LoadConst32(location.AsRegister<GpuRegister>(), value);
 }
 
+void CodeGeneratorMIPS64::AddLocationAsTemp(Location location, LocationSummary* locations) {
+  if (location.IsRegister()) {
+    locations->AddTemp(location);
+  } else {
+    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+  }
+}
+
 Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const {
   Primitive::Type type = load->GetType();
 
@@ -971,11 +995,11 @@
 }
 
 void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
-  stream << Mips64ManagedRegister::FromGpuRegister(GpuRegister(reg));
+  stream << GpuRegister(reg);
 }
 
 void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
-  stream << Mips64ManagedRegister::FromFpuRegister(FpuRegister(reg));
+  stream << FpuRegister(reg);
 }
 
 void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -1444,12 +1468,11 @@
 }
 
 void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
-  Primitive::Type value_type = instruction->GetComponentType();
-  bool is_object = value_type == Primitive::kPrimNot;
+  bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
-  if (is_object) {
+      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+  if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
@@ -2396,7 +2419,11 @@
 }
 
 void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO intrinsic function
+  IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
@@ -2405,7 +2432,11 @@
   // invokes must have been pruned by art::PrepareForRegisterAllocation.
   DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
 
-  // TODO - intrinsic function
+  IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 
   // While SetupBlockedRegisters() blocks registers S2-S8 due to their
@@ -2420,10 +2451,10 @@
   }
 }
 
-static bool TryGenerateIntrinsicCode(HInvoke* invoke,
-                                     CodeGeneratorMIPS64* codegen ATTRIBUTE_UNUSED) {
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) {
   if (invoke->GetLocations()->Intrinsified()) {
-    // TODO - intrinsic function
+    IntrinsicCodeGeneratorMIPS64 intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
     return true;
   }
   return false;
@@ -2532,7 +2563,10 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  // TODO: Try to generate intrinsics code.
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
@@ -3108,6 +3142,74 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo());
 }
 
+void LocationsBuilderMIPS64::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionMIPS64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionMIPS64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderMIPS64::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionMIPS64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionMIPS64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderMIPS64::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionMIPS64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionMIPS64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderMIPS64::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionMIPS64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionMIPS64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
 void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
   new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
 }
@@ -3365,5 +3467,38 @@
   // Will be generated at use site.
 }
 
+// Simple implementation of packed switch - generate cascaded compare/jumps.
+void LocationsBuilderMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  int32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Create a series of compare/jumps.
+  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+  for (int32_t i = 0; i < num_entries; i++) {
+    int32_t case_value = lower_bound + i;
+    Label* succ = codegen_->GetLabelOf(successors.at(i));
+    if (case_value == 0) {
+      __ Beqzc(value_reg, succ);
+    } else {
+      __ LoadConst32(TMP, case_value);
+      __ Beqc(value_reg, TMP, succ);
+    }
+  }
+
+  // And the default for any other value.
+  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+    __ B(codegen_->GetLabelOf(default_block));
+  }
+}
+
 }  // namespace mips64
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index f66ecb3..5e8f9e7 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -106,6 +106,31 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
 };
 
+class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention {
+ public:
+  FieldAccessCallingConventionMIPS64() {}
+
+  Location GetObjectLocation() const OVERRIDE {
+    return Location::RegisterLocation(A1);
+  }
+  Location GetFieldIndexLocation() const OVERRIDE {
+    return Location::RegisterLocation(A0);
+  }
+  Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return Location::RegisterLocation(A0);
+  }
+  Location GetSetValueLocation(
+      Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE {
+    return is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1);
+  }
+  Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return Location::FpuRegisterLocation(F0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionMIPS64);
+};
+
 class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap {
  public:
   ParallelMoveResolverMIPS64(ArenaAllocator* allocator, CodeGeneratorMIPS64* codegen)
@@ -270,21 +295,23 @@
   }
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+    return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_.SetSize(GetGraph()->GetBlocks().size());
+    block_labels_ = CommonInitializeLabels<Label>();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
   // Code generation helpers.
-
-  void MoveLocation(Location destination, Location source, Primitive::Type type);
+  void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
 
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
 
+  void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+
+
   void SwapLocations(Location loc1, Location loc2, Primitive::Type type);
 
   // Generate code to invoke a runtime entry point.
@@ -315,7 +342,7 @@
 
  private:
   // Labels for each block that will be compiled.
-  GrowableArray<Label> block_labels_;
+  Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
   LocationsBuilderMIPS64 location_builder_;
   InstructionCodeGeneratorMIPS64 instruction_visitor_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3d03dd8..5078456 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -380,6 +380,51 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
 };
 
+class ArraySetSlowPathX86 : public SlowPathCode {
+ public:
+  explicit ArraySetSlowPathX86(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
+};
+
 #undef __
 #define __ down_cast<X86Assembler*>(GetAssembler())->
 
@@ -470,13 +515,13 @@
                     0,
                     compiler_options,
                     stats),
-      block_labels_(graph->GetArena(), 0),
+      block_labels_(nullptr),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
       isa_features_(isa_features),
-      method_patches_(graph->GetArena()->Adapter()),
-      relative_call_patches_(graph->GetArena()->Adapter()) {
+      method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -782,7 +827,10 @@
           Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
           Primitive::kPrimInt);
     } else if (source.IsFpuRegister()) {
-      LOG(FATAL) << "Unimplemented";
+      XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
+      __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
+      __ psrlq(src_reg, Immediate(32));
+      __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
     } else {
       // No conflict possible, so just do the moves.
       DCHECK(source.IsDoubleStackSlot());
@@ -795,6 +843,15 @@
       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
     } else if (source.IsDoubleStackSlot()) {
       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
+    } else if (source.IsRegisterPair()) {
+      size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt);
+      // Create stack space for 2 elements.
+      __ subl(ESP, Immediate(2 * elem_size));
+      __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
+      __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
+      __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
+      // And remove the temporary stack space we allocated.
+      __ addl(ESP, Immediate(2 * elem_size));
     } else {
       LOG(FATAL) << "Unimplemented";
     }
@@ -921,6 +978,25 @@
   __ movl(location.AsRegister<Register>(), Immediate(value));
 }
 
+void CodeGeneratorX86::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
+  if (Primitive::Is64BitType(dst_type)) {
+    Move64(dst, src);
+  } else {
+    Move32(dst, src);
+  }
+}
+
+void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
+  if (location.IsRegister()) {
+    locations->AddTemp(location);
+  } else if (location.IsRegisterPair()) {
+    locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
+    locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
+  } else {
+    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+  }
+}
+
 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   DCHECK(!successor->IsExitBlock());
 
@@ -1314,7 +1390,7 @@
     default: {
       // Integer case.
 
-      // Clear output register: setcc only sets the low byte.
+      // Clear output register: setb only sets the low byte.
       __ xorl(reg, reg);
 
       if (rhs.IsRegister()) {
@@ -4040,6 +4116,74 @@
   HandleFieldGet(instruction, instruction->GetFieldInfo());
 }
 
+void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionX86 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionX86 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionX86 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionX86 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionX86 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionX86 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionX86 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionX86 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
       ? LocationSummary::kCallOnSlowPath
@@ -4245,72 +4389,59 @@
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
 
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  bool is_byte_type = (value_type == Primitive::kPrimBoolean)
+      || (value_type == Primitive::kPrimByte);
+  // We need the inputs to be different than the output in case of long operation.
+  // In case of a byte operation, the register allocator does not support multiple
+  // inputs that die at entry with one in a specific register.
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (is_byte_type) {
+    // Ensure the value is in a byte register.
+    locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
+  } else if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
-    bool is_byte_type = (value_type == Primitive::kPrimBoolean)
-        || (value_type == Primitive::kPrimByte);
-    // We need the inputs to be different than the output in case of long operation.
-    // In case of a byte operation, the register allocator does not support multiple
-    // inputs that die at entry with one in a specific register.
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (is_byte_type) {
-      // Ensure the value is in a byte register.
-      locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
-    } else if (Primitive::IsFloatingPointType(value_type)) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
-    }
-    if (needs_write_barrier) {
-      // Temporary registers for the write barrier.
-      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-      // Ensure the card is in a byte register.
-      locations->AddTemp(Location::RegisterLocation(ECX));
-    }
+    locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+  }
+  if (needs_write_barrier) {
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+    // Ensure the card is in a byte register.
+    locations->AddTemp(Location::RegisterLocation(ECX));
   }
 }
 
 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register array = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  bool may_need_runtime_call = locations->CanCall();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        if (value.IsRegister()) {
-          __ movb(Address(obj, offset), value.AsRegister<ByteRegister>());
-        } else {
-          __ movb(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_1, offset);
+      if (value.IsRegister()) {
+        __ movb(address, value.AsRegister<ByteRegister>());
       } else {
-        if (value.IsRegister()) {
-          __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
-                  value.AsRegister<ByteRegister>());
-        } else {
-          __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
@@ -4318,93 +4449,106 @@
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        if (value.IsRegister()) {
-          __ movw(Address(obj, offset), value.AsRegister<Register>());
-        } else {
-          __ movw(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_2, offset);
+      if (value.IsRegister()) {
+        __ movw(address, value.AsRegister<Register>());
       } else {
-        if (value.IsRegister()) {
-          __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset),
-                  value.AsRegister<Register>());
-        } else {
-          __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
-    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
-        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        if (index.IsConstant()) {
-          size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              Register temp = locations->GetTemp(0).AsRegister<Register>();
-              __ movl(temp, value.AsRegister<Register>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, offset), temp);
-            } else {
-              __ movl(Address(obj, offset), value.AsRegister<Register>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, offset), Immediate(v));
-          }
-        } else {
-          DCHECK(index.IsRegister()) << index;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              Register temp = locations->GetTemp(0).AsRegister<Register>();
-              __ movl(temp, value.AsRegister<Register>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), temp);
-            } else {
-              __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
-                      value.AsRegister<Register>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), Immediate(v));
-          }
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+      if (!value.IsRegister()) {
+        // Just setting null.
+        DCHECK(instruction->InputAt(2)->IsNullConstant());
+        DCHECK(value.IsConstant()) << value;
+        __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-        if (needs_write_barrier) {
-          Register temp = locations->GetTemp(0).AsRegister<Register>();
-          Register card = locations->GetTemp(1).AsRegister<Register>();
-          codegen_->MarkGCCard(
-              temp, card, obj, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
-        }
-      } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        DCHECK(!codegen_->IsLeafMethod());
-        // Note: if heap poisoning is enabled, pAputObject takes cares
-        // of poisoning the reference.
-        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
-                                instruction,
-                                instruction->GetDexPc(),
-                                nullptr);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call);
+        break;
       }
+
+      DCHECK(needs_write_barrier);
+      Register register_value = value.AsRegister<Register>();
+      NearLabel done, not_null, do_put;
+      SlowPathCode* slow_path = nullptr;
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
+      if (may_need_runtime_call) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          __ testl(register_value, register_value);
+          __ j(kNotEqual, &not_null);
+          __ movl(address, Immediate(0));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ jmp(&done);
+          __ Bind(&not_null);
+        }
+
+        __ movl(temp, Address(array, class_offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ MaybeUnpoisonHeapReference(temp);
+        __ movl(temp, Address(temp, component_offset));
+        // No need to poison/unpoison, we're comparing two poisoned references.
+        __ cmpl(temp, Address(register_value, class_offset));
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          __ j(kEqual, &do_put);
+          __ MaybeUnpoisonHeapReference(temp);
+          __ movl(temp, Address(temp, super_offset));
+          // No need to unpoison, we're comparing against null..
+          __ testl(temp, temp);
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+        }
+      }
+
+      if (kPoisonHeapReferences) {
+        __ movl(temp, register_value);
+        __ PoisonHeapReference(temp);
+        __ movl(address, temp);
+      } else {
+        __ movl(address, register_value);
+      }
+      if (!may_need_runtime_call) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      Register card = locations->GetTemp(1).AsRegister<Register>();
+      codegen_->MarkGCCard(
+          temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull());
+      __ Bind(&done);
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
+      }
+
+      break;
+    }
+    case Primitive::kPrimInt: {
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+      if (value.IsRegister()) {
+        __ movl(address, value.AsRegister<Register>());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(address, Immediate(v));
+      }
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4413,30 +4557,30 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
         if (value.IsRegisterPair()) {
-          __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>());
+          __ movl(Address(array, offset), value.AsRegisterPairLow<Register>());
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh<Register>());
+          __ movl(Address(array, offset + kX86WordSize), value.AsRegisterPairHigh<Register>());
         } else {
           DCHECK(value.IsConstant());
           int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
-          __ movl(Address(obj, offset), Immediate(Low32Bits(val)));
+          __ movl(Address(array, offset), Immediate(Low32Bits(val)));
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, offset + kX86WordSize), Immediate(High32Bits(val)));
+          __ movl(Address(array, offset + kX86WordSize), Immediate(High32Bits(val)));
         }
       } else {
         if (value.IsRegisterPair()) {
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset),
                   value.AsRegisterPairLow<Register>());
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
                   value.AsRegisterPairHigh<Register>());
         } else {
           DCHECK(value.IsConstant());
           int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset),
                   Immediate(Low32Bits(val)));
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
+          __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
                   Immediate(High32Bits(val)));
         }
       }
@@ -4444,28 +4588,22 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
       DCHECK(value.IsFpuRegister());
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ movss(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        __ movss(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      __ movss(address, value.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
+          : Address(array, index.AsRegister<Register>(), TIMES_8, offset);
       DCHECK(value.IsFpuRegister());
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movsd(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        __ movsd(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      __ movsd(address, value.AsFpuRegister<XmmRegister>());
       break;
     }
 
@@ -4630,7 +4768,8 @@
 }
 
 void ParallelMoveResolverX86::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
@@ -4782,7 +4921,8 @@
 }
 
 void ParallelMoveResolverX86::EmitSwap(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
@@ -5038,6 +5178,7 @@
         DCHECK(cls.IsStackSlot()) << cls;
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
+
       // Classes must be equal for the instanceof to succeed.
       __ j(kNotEqual, &zero);
       __ movl(out, Immediate(1));
@@ -5092,7 +5233,16 @@
       break;
     }
     case TypeCheckKind::kArrayObjectCheck: {
-      // Just need to check that the object's class is a non primitive array.
+      // Do an exact check.
+      NearLabel exact_check;
+      if (cls.IsRegister()) {
+        __ cmpl(out, cls.AsRegister<Register>());
+      } else {
+        DCHECK(cls.IsStackSlot()) << cls;
+        __ cmpl(out, Address(ESP, cls.GetStackIndex()));
+      }
+      __ j(kEqual, &exact_check);
+      // Otherwise, we need to check that the object's class is a non primitive array.
       __ movl(out, Address(out, component_offset));
       __ MaybeUnpoisonHeapReference(out);
       __ testl(out, out);
@@ -5100,6 +5250,7 @@
       __ j(kEqual, &done);
       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
       __ j(kNotEqual, &zero);
+      __ Bind(&exact_check);
       __ movl(out, Immediate(1));
       __ jmp(&done);
       break;
@@ -5255,7 +5406,7 @@
     }
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      NearLabel loop, success;
+      NearLabel loop;
       __ Bind(&loop);
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
@@ -5263,18 +5414,25 @@
         DCHECK(cls.IsStackSlot()) << cls;
         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
       }
-      __ j(kEqual, &success);
+      __ j(kEqual, &done);
       __ movl(temp, Address(temp, super_offset));
       __ MaybeUnpoisonHeapReference(temp);
       __ testl(temp, temp);
       __ j(kNotEqual, &loop);
       // Jump to the slow path to throw the exception.
       __ jmp(slow_path->GetEntryLabel());
-      __ Bind(&success);
       break;
     }
     case TypeCheckKind::kArrayObjectCheck: {
-      // Just need to check that the object's class is a non primitive array.
+      // Do an exact check.
+      if (cls.IsRegister()) {
+        __ cmpl(temp, cls.AsRegister<Register>());
+      } else {
+        DCHECK(cls.IsStackSlot()) << cls;
+        __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
+      }
+      __ j(kEqual, &done);
+      // Otherwise, we need to check that the object's class is a non primitive array.
       __ movl(temp, Address(temp, component_offset));
       __ MaybeUnpoisonHeapReference(temp);
       __ testl(temp, temp);
@@ -5470,6 +5628,38 @@
   // Will be generated at use site.
 }
 
+// Simple implementation of packed switch - generate cascaded compare/jumps.
+void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  int32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Create a series of compare/jumps.
+  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+  for (int i = 0; i < num_entries; i++) {
+    int32_t case_value = lower_bound + i;
+    if (case_value == 0) {
+      __ testl(value_reg, value_reg);
+    } else {
+      __ cmpl(value_reg, Immediate(case_value));
+    }
+    __ j(kEqual, codegen_->GetLabelOf(successors.at(i)));
+  }
+
+  // And the default for any other value.
+  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ jmp(codegen_->GetLabelOf(default_block));
+  }
+}
+
 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
     HX86ComputeBaseMethodAddress* insn) {
   LocationSummary* locations =
@@ -5571,7 +5761,7 @@
 /**
  * Class to handle late fixup of offsets into constant area.
  */
-class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
  public:
   RIPFixup(const CodeGeneratorX86& codegen, int offset)
       : codegen_(codegen), offset_into_constant_area_(offset) {}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f38e1ea..ae2d84f 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -91,6 +91,36 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
 };
 
+class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
+ public:
+  FieldAccessCallingConventionX86() {}
+
+  Location GetObjectLocation() const OVERRIDE {
+    return Location::RegisterLocation(ECX);
+  }
+  Location GetFieldIndexLocation() const OVERRIDE {
+    return Location::RegisterLocation(EAX);
+  }
+  Location GetReturnLocation(Primitive::Type type) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::RegisterPairLocation(EAX, EDX)
+        : Location::RegisterLocation(EAX);
+  }
+  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::RegisterPairLocation(EDX, EBX)
+        : (is_instance
+            ? Location::RegisterLocation(EDX)
+            : Location::RegisterLocation(ECX));
+  }
+  Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return Location::FpuRegisterLocation(XMM0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86);
+};
+
 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
  public:
   ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
@@ -228,6 +258,9 @@
   void Bind(HBasicBlock* block) OVERRIDE;
   void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
+  void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
+  void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
@@ -316,11 +349,11 @@
                   bool value_can_be_null);
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+    return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_.SetSize(GetGraph()->GetBlocks().size());
+    block_labels_ = CommonInitializeLabels<Label>();
   }
 
   bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
@@ -356,7 +389,7 @@
 
  private:
   // Labels for each block that will be compiled.
-  GrowableArray<Label> block_labels_;
+  Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
   LocationsBuilderX86 location_builder_;
   InstructionCodeGeneratorX86 instruction_visitor_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 32a1db5..791bb9e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -396,6 +396,51 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
 };
 
+class ArraySetSlowPathX86_64 : public SlowPathCode {
+ public:
+  explicit ArraySetSlowPathX86_64(HInstruction* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
+
+ private:
+  HInstruction* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
+};
+
 #undef __
 #define __ down_cast<X86_64Assembler*>(GetAssembler())->
 
@@ -620,15 +665,15 @@
                                           arraysize(kFpuCalleeSaves)),
                       compiler_options,
                       stats),
-        block_labels_(graph->GetArena(), 0),
+        block_labels_(nullptr),
         location_builder_(graph, this),
         instruction_visitor_(graph, this),
         move_resolver_(graph->GetArena(), this),
         isa_features_(isa_features),
         constant_area_start_(0),
-        method_patches_(graph->GetArena()->Adapter()),
-        relative_call_patches_(graph->GetArena()->Adapter()),
-        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter()) {
+        method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -945,6 +990,19 @@
   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
 }
 
+void CodeGeneratorX86_64::MoveLocation(
+    Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
+  Move(dst, src);
+}
+
+void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
+  if (location.IsRegister()) {
+    locations->AddTemp(location);
+  } else {
+    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+  }
+}
+
 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   DCHECK(!successor->IsExitBlock());
 
@@ -3804,6 +3862,74 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
+void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionX86_64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionX86_64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionX86_64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionX86_64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionX86_64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionX86_64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionX86_64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionX86_64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
       ? LocationSummary::kCallOnSlowPath
@@ -3992,66 +4118,55 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call = instruction->NeedsTypeCheck();
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-  } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(
-        1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    locations->SetInAt(2, Location::RequiresRegister());
-    if (value_type == Primitive::kPrimLong) {
-      locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
-    } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
-    }
+      instruction,
+      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
 
-    if (needs_write_barrier) {
-      // Temporary registers for the write barrier.
-      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
-      locations->AddTemp(Location::RequiresRegister());
-    }
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(
+      1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(2, Location::RequiresRegister());
+  if (value_type == Primitive::kPrimLong) {
+    locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
+  } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+  }
+
+  if (needs_write_barrier) {
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister array = locations->InAt(0).AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call = locations->CanCall();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        if (value.IsRegister()) {
-          __ movb(Address(obj, offset), value.AsRegister<CpuRegister>());
-        } else {
-          __ movb(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
+      if (value.IsRegister()) {
+        __ movb(address, value.AsRegister<CpuRegister>());
       } else {
-        if (value.IsRegister()) {
-          __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset),
-                  value.AsRegister<CpuRegister>());
-        } else {
-          __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
@@ -4059,154 +4174,145 @@
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        if (value.IsRegister()) {
-          __ movw(Address(obj, offset), value.AsRegister<CpuRegister>());
-        } else {
-          DCHECK(value.IsConstant()) << value;
-          __ movw(Address(obj, offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
+      if (value.IsRegister()) {
+        __ movw(address, value.AsRegister<CpuRegister>());
       } else {
-        DCHECK(index.IsRegister()) << index;
-        if (value.IsRegister()) {
-          __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset),
-                  value.AsRegister<CpuRegister>());
-        } else {
-          DCHECK(value.IsConstant()) << value;
-          __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        DCHECK(value.IsConstant()) << value;
+        __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
-    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
-        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        if (index.IsConstant()) {
-          size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-              __ movl(temp, value.AsRegister<CpuRegister>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, offset), temp);
-            } else {
-              __ movl(Address(obj, offset), value.AsRegister<CpuRegister>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, offset), Immediate(v));
-          }
-        } else {
-          DCHECK(index.IsRegister()) << index;
-          if (value.IsRegister()) {
-            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
-              CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-              __ movl(temp, value.AsRegister<CpuRegister>());
-              __ PoisonHeapReference(temp);
-              __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), temp);
-            } else {
-              __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
-                      value.AsRegister<CpuRegister>());
-            }
-          } else {
-            DCHECK(value.IsConstant()) << value;
-            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
-            // `value_type == Primitive::kPrimNot` implies `v == 0`.
-            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
-            // Note: if heap poisoning is enabled, no need to poison
-            // (negate) `v` if it is a reference, as it would be null.
-            __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
-                    Immediate(v));
-          }
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+      if (!value.IsRegister()) {
+        // Just setting null.
+        DCHECK(instruction->InputAt(2)->IsNullConstant());
+        DCHECK(value.IsConstant()) << value;
+        __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        if (needs_write_barrier) {
-          DCHECK_EQ(value_type, Primitive::kPrimNot);
-          CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-          CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
-          codegen_->MarkGCCard(
-              temp, card, obj, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
-        }
-      } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        // Note: if heap poisoning is enabled, pAputObject takes cares
-        // of poisoning the reference.
-        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
-                                instruction,
-                                instruction->GetDexPc(),
-                                nullptr);
-        DCHECK(!codegen_->IsLeafMethod());
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call);
+        break;
       }
+
+      DCHECK(needs_write_barrier);
+      CpuRegister register_value = value.AsRegister<CpuRegister>();
+      NearLabel done, not_null, do_put;
+      SlowPathCode* slow_path = nullptr;
+      CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+      if (may_need_runtime_call) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          __ testl(register_value, register_value);
+          __ j(kNotEqual, &not_null);
+          __ movl(address, Immediate(0));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ jmp(&done);
+          __ Bind(&not_null);
+        }
+
+        __ movl(temp, Address(array, class_offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ MaybeUnpoisonHeapReference(temp);
+        __ movl(temp, Address(temp, component_offset));
+        // No need to poison/unpoison, we're comparing two poisoned references.
+        __ cmpl(temp, Address(register_value, class_offset));
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          __ j(kEqual, &do_put);
+          __ MaybeUnpoisonHeapReference(temp);
+          __ movl(temp, Address(temp, super_offset));
+          // No need to unpoison the result, we're comparing against null.
+          __ testl(temp, temp);
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ j(kNotEqual, slow_path->GetEntryLabel());
+        }
+      }
+
+      if (kPoisonHeapReferences) {
+        __ movl(temp, register_value);
+        __ PoisonHeapReference(temp);
+        __ movl(address, temp);
+      } else {
+        __ movl(address, register_value);
+      }
+      if (!may_need_runtime_call) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
+      codegen_->MarkGCCard(
+          temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
+      __ Bind(&done);
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
+      }
+
+      break;
+    }
+    case Primitive::kPrimInt: {
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+      if (value.IsRegister()) {
+        __ movl(address, value.AsRegister<CpuRegister>());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(address, Immediate(v));
+      }
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        if (value.IsRegister()) {
-          __ movq(Address(obj, offset), value.AsRegister<CpuRegister>());
-        } else {
-          int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-          DCHECK(IsInt<32>(v));
-          int32_t v_32 = v;
-          __ movq(Address(obj, offset), Immediate(v_32));
-        }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
+      if (value.IsRegister()) {
+        __ movq(address, value.AsRegister<CpuRegister>());
       } else {
-        if (value.IsRegister()) {
-          __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
-                  value.AsRegister<CpuRegister>());
-        } else {
-          int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-          DCHECK(IsInt<32>(v));
-          int32_t v_32 = v;
-          __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
-                  Immediate(v_32));
-        }
+        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
+        DCHECK(IsInt<32>(v));
+        int32_t v_32 = v;
+        __ movq(address, Immediate(v_32));
       }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        DCHECK(value.IsFpuRegister());
-        __ movss(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        DCHECK(value.IsFpuRegister());
-        __ movss(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+      DCHECK(value.IsFpuRegister());
+      __ movss(address, value.AsFpuRegister<XmmRegister>());
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        DCHECK(value.IsFpuRegister());
-        __ movsd(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
-      } else {
-        DCHECK(value.IsFpuRegister());
-        __ movsd(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
-                value.AsFpuRegister<XmmRegister>());
-      }
+      uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      Address address = index.IsConstant()
+          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
+          : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
+      DCHECK(value.IsFpuRegister());
+      __ movsd(address, value.AsFpuRegister<XmmRegister>());
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
@@ -4250,7 +4356,7 @@
   Location index_loc = locations->InAt(0);
   Location length_loc = locations->InAt(1);
   SlowPathCode* slow_path =
-    new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
 
   if (length_loc.IsConstant()) {
     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
@@ -4373,7 +4479,8 @@
 }
 
 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
@@ -4531,7 +4638,8 @@
 }
 
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
@@ -4766,10 +4874,16 @@
         DCHECK(cls.IsStackSlot()) << cls;
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
-      // Classes must be equal for the instanceof to succeed.
-      __ j(kNotEqual, &zero);
-      __ movl(out, Immediate(1));
-      __ jmp(&done);
+      if (zero.IsLinked()) {
+        // Classes must be equal for the instanceof to succeed.
+        __ j(kNotEqual, &zero);
+        __ movl(out, Immediate(1));
+        __ jmp(&done);
+      } else {
+        __ setcc(kEqual, out);
+        // setcc only sets the low byte.
+        __ andl(out, Immediate(1));
+      }
       break;
     }
     case TypeCheckKind::kAbstractClassCheck: {
@@ -4820,7 +4934,16 @@
       break;
     }
     case TypeCheckKind::kArrayObjectCheck: {
-      // Just need to check that the object's class is a non primitive array.
+      // Do an exact check.
+      NearLabel exact_check;
+      if (cls.IsRegister()) {
+        __ cmpl(out, cls.AsRegister<CpuRegister>());
+      } else {
+        DCHECK(cls.IsStackSlot()) << cls;
+        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
+      }
+      __ j(kEqual, &exact_check);
+      // Otherwise, we need to check that the object's class is a non primitive array.
       __ movl(out, Address(out, component_offset));
       __ MaybeUnpoisonHeapReference(out);
       __ testl(out, out);
@@ -4828,6 +4951,7 @@
       __ j(kEqual, &done);
       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
       __ j(kNotEqual, &zero);
+      __ Bind(&exact_check);
       __ movl(out, Immediate(1));
       __ jmp(&done);
       break;
@@ -4983,7 +5107,7 @@
     }
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
-      NearLabel loop, success;
+      NearLabel loop;
       __ Bind(&loop);
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
@@ -4991,18 +5115,25 @@
         DCHECK(cls.IsStackSlot()) << cls;
         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
-      __ j(kEqual, &success);
+      __ j(kEqual, &done);
       __ movl(temp, Address(temp, super_offset));
       __ MaybeUnpoisonHeapReference(temp);
       __ testl(temp, temp);
       __ j(kNotEqual, &loop);
       // Jump to the slow path to throw the exception.
       __ jmp(slow_path->GetEntryLabel());
-      __ Bind(&success);
       break;
     }
     case TypeCheckKind::kArrayObjectCheck: {
-      // Just need to check that the object's class is a non primitive array.
+      // Do an exact check.
+      if (cls.IsRegister()) {
+        __ cmpl(temp, cls.AsRegister<CpuRegister>());
+      } else {
+        DCHECK(cls.IsStackSlot()) << cls;
+        __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
+      }
+      __ j(kEqual, &done);
+      // Otherwise, we need to check that the object's class is a non primitive array.
       __ movl(temp, Address(temp, component_offset));
       __ MaybeUnpoisonHeapReference(temp);
       __ testl(temp, temp);
@@ -5180,6 +5311,38 @@
   // Will be generated at use site.
 }
 
+// Simple implementation of packed switch - generate cascaded compare/jumps.
+void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  int32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  CpuRegister value_reg = locations->InAt(0).AsRegister<CpuRegister>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Create a series of compare/jumps.
+  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+  for (int i = 0; i < num_entries; i++) {
+    int32_t case_value = lower_bound + i;
+    if (case_value == 0) {
+      __ testl(value_reg, value_reg);
+    } else {
+      __ cmpl(value_reg, Immediate(case_value));
+    }
+    __ j(kEqual, codegen_->GetLabelOf(successors.at(i)));
+  }
+
+  // And the default for any other value.
+  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ jmp(codegen_->GetLabelOf(default_block));
+  }
+}
+
 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
   if (value == 0) {
     __ xorl(dest, dest);
@@ -5222,7 +5385,7 @@
 /**
  * Class to handle late fixup of offsets into constant area.
  */
-class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
   public:
     RIPFixup(const CodeGeneratorX86_64& codegen, int offset)
       : codegen_(codegen), offset_into_constant_area_(offset) {}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1ec3580..ecc8630 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -70,6 +70,35 @@
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
 };
 
+class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
+ public:
+  FieldAccessCallingConventionX86_64() {}
+
+  Location GetObjectLocation() const OVERRIDE {
+    return Location::RegisterLocation(RSI);
+  }
+  Location GetFieldIndexLocation() const OVERRIDE {
+    return Location::RegisterLocation(RDI);
+  }
+  Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return Location::RegisterLocation(RAX);
+  }
+  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::RegisterLocation(RDX)
+        : (is_instance
+            ? Location::RegisterLocation(RDX)
+            : Location::RegisterLocation(RSI));
+  }
+  Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+    return Location::FpuRegisterLocation(XMM0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64);
+};
+
+
 class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
  public:
   InvokeDexCallingConventionVisitorX86_64() {}
@@ -215,6 +244,9 @@
   void Bind(HBasicBlock* block) OVERRIDE;
   void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
+  void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
+  void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
@@ -286,11 +318,11 @@
   void Move(Location destination, Location source);
 
   Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+    return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_.SetSize(GetGraph()->GetBlocks().size());
+    block_labels_ = CommonInitializeLabels<Label>();
   }
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
@@ -334,7 +366,7 @@
   };
 
   // Labels for each block that will be compiled.
-  GrowableArray<Label> block_labels_;
+  Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
   LocationsBuilderX86_64 location_builder_;
   InstructionCodeGeneratorX86_64 instruction_visitor_;
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 20ce110..e0aa4ff 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -226,14 +226,14 @@
 
   // We assume that GVN has run before, so we only perform a pointer
   // comparison.  If for some reason the values are equal but the pointers are
-  // different, we are still correct and only miss an optimisation
+  // different, we are still correct and only miss an optimization
   // opportunity.
   if (instruction->GetLeft() == instruction->GetRight()) {
     // Replace code looking like
     //    SUB dst, src, src
     // with
     //    CONSTANT 0
-    // Note that we cannot optimise `x - x` to `0` for floating-point. It does
+    // Note that we cannot optimize `x - x` to `0` for floating-point. It does
     // not work when `x` is an infinity.
     instruction->ReplaceWith(GetGraph()->GetConstant(type, 0));
     block->RemoveInstruction(instruction);
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 7d509a2..007d0e3 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -16,34 +16,67 @@
 
 #include "dead_code_elimination.h"
 
+#include "utils/array_ref.h"
 #include "base/bit_vector-inl.h"
 #include "ssa_phi_elimination.h"
 
 namespace art {
 
-static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) {
-  int block_id = block->GetBlockId();
-  if (visited->IsBitSet(block_id)) {
-    return;
-  }
-  visited->SetBit(block_id);
+static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) {
+  ArenaVector<HBasicBlock*> worklist(graph->GetArena()->Adapter());
+  constexpr size_t kDefaultWorlistSize = 8;
+  worklist.reserve(kDefaultWorlistSize);
+  visited->SetBit(graph->GetEntryBlock()->GetBlockId());
+  worklist.push_back(graph->GetEntryBlock());
 
-  HInstruction* last_instruction = block->GetLastInstruction();
-  if (last_instruction->IsIf()) {
-    HIf* if_instruction = last_instruction->AsIf();
-    HInstruction* condition = if_instruction->InputAt(0);
-    if (!condition->IsIntConstant()) {
-      MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited);
-      MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited);
-    } else if (condition->AsIntConstant()->IsOne()) {
-      MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited);
-    } else {
-      DCHECK(condition->AsIntConstant()->IsZero());
-      MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited);
+  while (!worklist.empty()) {
+    HBasicBlock* block = worklist.back();
+    worklist.pop_back();
+    int block_id = block->GetBlockId();
+    DCHECK(visited->IsBitSet(block_id));
+
+    ArrayRef<HBasicBlock* const> live_successors(block->GetSuccessors());
+    HInstruction* last_instruction = block->GetLastInstruction();
+    if (last_instruction->IsIf()) {
+      HIf* if_instruction = last_instruction->AsIf();
+      HInstruction* condition = if_instruction->InputAt(0);
+      if (condition->IsIntConstant()) {
+        if (condition->AsIntConstant()->IsOne()) {
+          live_successors = live_successors.SubArray(0u, 1u);
+          DCHECK_EQ(live_successors[0], if_instruction->IfTrueSuccessor());
+        } else {
+          DCHECK(condition->AsIntConstant()->IsZero());
+          live_successors = live_successors.SubArray(1u, 1u);
+          DCHECK_EQ(live_successors[0], if_instruction->IfFalseSuccessor());
+        }
+      }
+    } else if (last_instruction->IsPackedSwitch()) {
+      HPackedSwitch* switch_instruction = last_instruction->AsPackedSwitch();
+      HInstruction* switch_input = switch_instruction->InputAt(0);
+      if (switch_input->IsIntConstant()) {
+        int32_t switch_value = switch_input->AsIntConstant()->GetValue();
+        int32_t start_value = switch_instruction->GetStartValue();
+        // Note: Though the spec forbids packed-switch values to wrap around, we leave
+        // that task to the verifier and use unsigned arithmetic with it's "modulo 2^32"
+        // semantics to check if the value is in range, wrapped or not.
+        uint32_t switch_index =
+            static_cast<uint32_t>(switch_value) - static_cast<uint32_t>(start_value);
+        if (switch_index < switch_instruction->GetNumEntries()) {
+          live_successors = live_successors.SubArray(switch_index, 1u);
+          DCHECK_EQ(live_successors[0], block->GetSuccessor(switch_index));
+        } else {
+          live_successors = live_successors.SubArray(switch_instruction->GetNumEntries(), 1u);
+          DCHECK_EQ(live_successors[0], switch_instruction->GetDefaultBlock());
+        }
+      }
     }
-  } else {
-    for (HBasicBlock* successor : block->GetSuccessors()) {
-      MarkReachableBlocks(successor, visited);
+
+    for (HBasicBlock* successor : live_successors) {
+      // Add only those successors that have not been visited yet.
+      if (!visited->IsBitSet(successor->GetBlockId())) {
+        visited->SetBit(successor->GetBlockId());
+        worklist.push_back(successor);
+      }
     }
   }
 }
@@ -67,7 +100,7 @@
   ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false);
   ArenaBitVector affected_loops(allocator, graph_->GetBlocks().size(), false);
 
-  MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks);
+  MarkReachableBlocks(graph_, &live_blocks);
   bool removed_one_or_more_blocks = false;
 
   // Remove all dead blocks. Iterate in post order because removal needs the
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 583da30..4e1cafe 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -743,6 +743,22 @@
   }
 }
 
+void SSAChecker::VisitPackedSwitch(HPackedSwitch* instruction) {
+  VisitInstruction(instruction);
+  // Check that the number of block successors matches the switch count plus
+  // one for the default block.
+  HBasicBlock* block = instruction->GetBlock();
+  if (instruction->GetNumEntries() + 1u != block->GetSuccessors().size()) {
+    AddError(StringPrintf(
+        "%s instruction %d in block %d expects %u successors to the block, but found: %zu.",
+        instruction->DebugName(),
+        instruction->GetId(),
+        block->GetBlockId(),
+        instruction->GetNumEntries() + 1u,
+        block->GetSuccessors().size()));
+  }
+}
+
 void SSAChecker::VisitIf(HIf* instruction) {
   VisitInstruction(instruction);
   HandleBooleanInput(instruction, 0);
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 0e270db..7ddffc1 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -125,6 +125,7 @@
   void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
   void VisitCondition(HCondition* op) OVERRIDE;
   void VisitIf(HIf* instruction) OVERRIDE;
+  void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE;
   void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
   void VisitConstant(HConstant* instruction) OVERRIDE;
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index d05c514..7a83662 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -374,6 +374,11 @@
         << instance_of->MustDoNullCheck() << std::noboolalpha;
   }
 
+  void VisitArraySet(HArraySet* array_set) OVERRIDE {
+    StartAttributeStream("value_can_be_null") << std::boolalpha
+        << array_set->GetValueCanBeNull() << std::noboolalpha;
+  }
+
   void VisitInvoke(HInvoke* invoke) OVERRIDE {
     StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex();
     StartAttributeStream("method_name") << PrettyMethod(
@@ -393,6 +398,22 @@
     StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
   }
 
+  void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
+    StartAttributeStream("field_type") << field_access->GetFieldType();
+  }
+
+  void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* field_access) OVERRIDE {
+    StartAttributeStream("field_type") << field_access->GetFieldType();
+  }
+
+  void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* field_access) OVERRIDE {
+    StartAttributeStream("field_type") << field_access->GetFieldType();
+  }
+
+  void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* field_access) OVERRIDE {
+    StartAttributeStream("field_type") << field_access->GetFieldType();
+  }
+
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE {
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 1ee8648..7cf0617 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -15,11 +15,12 @@
  */
 
 #include "gvn.h"
+
+#include "base/arena_containers.h"
+#include "base/bit_vector-inl.h"
 #include "side_effects_analysis.h"
 #include "utils.h"
-
 #include "utils/arena_bit_vector.h"
-#include "base/bit_vector-inl.h"
 
 namespace art {
 
@@ -32,13 +33,13 @@
  * if there is one in the set. In GVN, we would say those instructions have the
  * same "number".
  */
-class ValueSet : public ArenaObject<kArenaAllocMisc> {
+class ValueSet : public ArenaObject<kArenaAllocGvn> {
  public:
   // Constructs an empty ValueSet which owns all its buckets.
   explicit ValueSet(ArenaAllocator* allocator)
       : allocator_(allocator),
         num_buckets_(kMinimumNumberOfBuckets),
-        buckets_(allocator->AllocArray<Node*>(num_buckets_)),
+        buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
         buckets_owned_(allocator, num_buckets_, false),
         num_entries_(0) {
     // ArenaAllocator returns zeroed memory, so no need to set buckets to null.
@@ -51,7 +52,7 @@
   ValueSet(ArenaAllocator* allocator, const ValueSet& to_copy)
       : allocator_(allocator),
         num_buckets_(to_copy.IdealBucketCount()),
-        buckets_(allocator->AllocArray<Node*>(num_buckets_)),
+        buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
         buckets_owned_(allocator, num_buckets_, false),
         num_entries_(to_copy.num_entries_) {
     // ArenaAllocator returns zeroed memory, so entries of buckets_ and
@@ -143,7 +144,7 @@
   size_t GetNumberOfEntries() const { return num_entries_; }
 
  private:
-  class Node : public ArenaObject<kArenaAllocMisc> {
+  class Node : public ArenaObject<kArenaAllocGvn> {
    public:
     Node(HInstruction* instruction, size_t hash_code, Node* next)
         : instruction_(instruction), hash_code_(hash_code), next_(next) {}
@@ -306,7 +307,7 @@
       : graph_(graph),
         allocator_(allocator),
         side_effects_(side_effects),
-        sets_(allocator, graph->GetBlocks().size(), nullptr) {}
+        sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)) {}
 
   void Run();
 
@@ -322,14 +323,14 @@
   // ValueSet for blocks. Initially null, but for an individual block they
   // are allocated and populated by the dominator, and updated by all blocks
   // in the path from the dominator to the block.
-  GrowableArray<ValueSet*> sets_;
+  ArenaVector<ValueSet*> sets_;
 
   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
 };
 
 void GlobalValueNumberer::Run() {
   DCHECK(side_effects_.HasRun());
-  sets_.Put(graph_->GetEntryBlock()->GetBlockId(), new (allocator_) ValueSet(allocator_));
+  sets_[graph_->GetEntryBlock()->GetBlockId()] = new (allocator_) ValueSet(allocator_);
 
   // Use the reverse post order to ensure the non back-edge predecessors of a block are
   // visited before the block itself.
@@ -348,7 +349,7 @@
     set = new (allocator_) ValueSet(allocator_);
   } else {
     HBasicBlock* dominator = block->GetDominator();
-    ValueSet* dominator_set = sets_.Get(dominator->GetBlockId());
+    ValueSet* dominator_set = sets_[dominator->GetBlockId()];
     if (dominator->GetSuccessors().size() == 1) {
       DCHECK_EQ(dominator->GetSuccessor(0), block);
       set = dominator_set;
@@ -363,7 +364,7 @@
         set->Kill(side_effects_.GetLoopEffects(block));
       } else if (predecessors.size() > 1) {
         for (HBasicBlock* predecessor : predecessors) {
-          set->IntersectWith(sets_.Get(predecessor->GetBlockId()));
+          set->IntersectWith(sets_[predecessor->GetBlockId()]);
           if (set->IsEmpty()) {
             break;
           }
@@ -372,7 +373,7 @@
     }
   }
 
-  sets_.Put(block->GetBlockId(), set);
+  sets_[block->GetBlockId()] = set;
 
   HInstruction* current = block->GetFirstInstruction();
   while (current != nullptr) {
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 92c732c..e5123de 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -33,17 +33,6 @@
 }
 
 /**
- * Returns true if instruction is proper entry-phi-operation for given loop
- * (referred to as mu-operation in Gerlek's paper).
- */
-static bool IsEntryPhi(HLoopInformation* loop, HInstruction* instruction) {
-  return
-      instruction->IsPhi() &&
-      instruction->InputCount() == 2 &&
-      instruction->GetBlock() == loop->GetHeader();
-}
-
-/**
  * Since graph traversal may enter a SCC at any position, an initial representation may be rotated,
  * along dependences, viz. any of (a, b, c, d), (d, a, b, c)  (c, d, a, b), (b, c, d, a) assuming
  * a chain of dependences (mutual independent items may occur in arbitrary order). For proper
@@ -58,8 +47,9 @@
   size_t phi_pos = -1;
   const size_t size = scc->size();
   for (size_t i = 0; i < size; i++) {
-    if (IsEntryPhi(loop, scc->at(i)) && (phi == nullptr || phis.FoundBefore(scc->at(i), phi))) {
-      phi = scc->at(i);
+    HInstruction* other = scc->at(i);
+    if (other->IsLoopHeaderPhi() && (phi == nullptr || phis.FoundBefore(other, phi))) {
+      phi = other;
       phi_pos = i;
     }
   }
@@ -84,11 +74,14 @@
 HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph)
     : HOptimization(graph, kInductionPassName),
       global_depth_(0),
-      stack_(graph->GetArena()->Adapter()),
-      scc_(graph->GetArena()->Adapter()),
-      map_(std::less<HInstruction*>(), graph->GetArena()->Adapter()),
-      cycle_(std::less<HInstruction*>(), graph->GetArena()->Adapter()),
-      induction_(std::less<HLoopInformation*>(), graph->GetArena()->Adapter()) {
+      stack_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+      scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+      map_(std::less<HInstruction*>(),
+           graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+      cycle_(std::less<HInstruction*>(),
+             graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+      induction_(std::less<HLoopInformation*>(),
+                 graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) {
 }
 
 void HInductionVarAnalysis::Run() {
@@ -168,7 +161,7 @@
     }
 
     // Classify the SCC.
-    if (scc_.size() == 1 && !IsEntryPhi(loop, scc_[0])) {
+    if (scc_.size() == 1 && !scc_[0]->IsLoopHeaderPhi()) {
       ClassifyTrivial(loop, scc_[0]);
     } else {
       ClassifyNonTrivial(loop);
@@ -200,10 +193,7 @@
 void HInductionVarAnalysis::ClassifyTrivial(HLoopInformation* loop, HInstruction* instruction) {
   InductionInfo* info = nullptr;
   if (instruction->IsPhi()) {
-    for (size_t i = 1, count = instruction->InputCount(); i < count; i++) {
-      info = TransferPhi(LookupInfo(loop, instruction->InputAt(0)),
-                         LookupInfo(loop, instruction->InputAt(i)));
-    }
+    info = TransferPhi(loop, instruction, /* input_index */ 0);
   } else if (instruction->IsAdd()) {
     info = TransferAddSub(LookupInfo(loop, instruction->InputAt(0)),
                           LookupInfo(loop, instruction->InputAt(1)), kAdd);
@@ -241,25 +231,25 @@
 
   // Rotate proper entry-phi to front.
   if (size > 1) {
-    ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter());
+    ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis));
     RotateEntryPhiFirst(loop, &scc_, &other);
   }
 
-  // Analyze from phi onwards.
+  // Analyze from entry-phi onwards.
   HInstruction* phi = scc_[0];
-  if (!IsEntryPhi(loop, phi)) {
+  if (!phi->IsLoopHeaderPhi()) {
     return;
   }
-  HInstruction* external = phi->InputAt(0);
-  HInstruction* internal = phi->InputAt(1);
-  InductionInfo* initial = LookupInfo(loop, external);
+
+  // External link should be loop invariant.
+  InductionInfo* initial = LookupInfo(loop, phi->InputAt(0));
   if (initial == nullptr || initial->induction_class != kInvariant) {
     return;
   }
 
-  // Singleton entry-phi-operation may be a wrap-around induction.
+  // Singleton is wrap-around induction if all internal links have the same meaning.
   if (size == 1) {
-    InductionInfo* update = LookupInfo(loop, internal);
+    InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1);
     if (update != nullptr) {
       AssignInfo(loop, phi, CreateInduction(kWrapAround, initial, update));
     }
@@ -272,7 +262,7 @@
     HInstruction* instruction = scc_[i];
     InductionInfo* update = nullptr;
     if (instruction->IsPhi()) {
-      update = SolvePhi(loop, phi, instruction);
+      update = SolvePhiAllInputs(loop, phi, instruction);
     } else if (instruction->IsAdd()) {
       update = SolveAddSub(
           loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1), kAdd, true);
@@ -286,10 +276,9 @@
     cycle_.Put(instruction, update);
   }
 
-  // Success if the internal link received a meaning.
-  auto it = cycle_.find(internal);
-  if (it != cycle_.end()) {
-    InductionInfo* induction = it->second;
+  // Success if all internal links received the same temporary meaning.
+  InductionInfo* induction = SolvePhi(phi, /* input_index */ 1);
+  if (induction != nullptr) {
     switch (induction->induction_class) {
       case kInvariant:
         // Classify first phi and then the rest of the cycle "on-demand".
@@ -329,13 +318,20 @@
   return CreateInduction(kPeriodic, induction->op_a, RotatePeriodicInduction(induction->op_b, last));
 }
 
-HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferPhi(InductionInfo* a,
-                                                                         InductionInfo* b) {
-  // Transfer over a phi: if both inputs are identical, result is input.
-  if (InductionEqual(a, b)) {
-    return a;
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferPhi(HLoopInformation* loop,
+                                                                         HInstruction* phi,
+                                                                         size_t input_index) {
+  // Match all phi inputs from input_index onwards exactly.
+  const size_t count = phi->InputCount();
+  DCHECK_LT(input_index, count);
+  InductionInfo* a = LookupInfo(loop, phi->InputAt(input_index));
+  for (size_t i = input_index + 1; i < count; i++) {
+    InductionInfo* b = LookupInfo(loop, phi->InputAt(i));
+    if (!InductionEqual(a, b)) {
+      return nullptr;
+    }
   }
-  return nullptr;
+  return a;
 }
 
 HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(InductionInfo* a,
@@ -421,47 +417,56 @@
   return nullptr;
 }
 
-HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HLoopInformation* loop,
-                                                                      HInstruction* phi,
-                                                                      HInstruction* instruction) {
-  // Solve within a cycle over a phi: identical inputs are combined into that input as result.
-  const size_t count = instruction->InputCount();
-  DCHECK_GT(count, 0u);
-  auto ita = cycle_.find(instruction->InputAt(0));
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HInstruction* phi,
+                                                                      size_t input_index) {
+  // Match all phi inputs from input_index onwards exactly.
+  const size_t count = phi->InputCount();
+  DCHECK_LT(input_index, count);
+  auto ita = cycle_.find(phi->InputAt(input_index));
   if (ita != cycle_.end()) {
-    InductionInfo* a = ita->second;
-    for (size_t i = 1; i < count; i++) {
-      auto itb = cycle_.find(instruction->InputAt(i));
-      if (itb == cycle_.end() || !HInductionVarAnalysis::InductionEqual(a, itb->second)) {
+    for (size_t i = input_index + 1; i < count; i++) {
+      auto itb = cycle_.find(phi->InputAt(i));
+      if (itb == cycle_.end() ||
+          !HInductionVarAnalysis::InductionEqual(ita->second, itb->second)) {
         return nullptr;
       }
     }
-    return a;
+    return ita->second;
+  }
+  return nullptr;
+}
+
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhiAllInputs(
+    HLoopInformation* loop,
+    HInstruction* entry_phi,
+    HInstruction* phi) {
+  // Match all phi inputs.
+  InductionInfo* match = SolvePhi(phi, /* input_index */ 0);
+  if (match != nullptr) {
+    return match;
   }
 
-  // Solve within a cycle over another entry-phi: add invariants into a periodic.
-  if (IsEntryPhi(loop, instruction)) {
-    InductionInfo* a = LookupInfo(loop, instruction->InputAt(0));
+  // Otherwise, try to solve for a periodic seeded from phi onward.
+  // Only tight multi-statement cycles are considered in order to
+  // simplify rotating the periodic during the final classification.
+  if (phi->IsLoopHeaderPhi() && phi->InputCount() == 2) {
+    InductionInfo* a = LookupInfo(loop, phi->InputAt(0));
     if (a != nullptr && a->induction_class == kInvariant) {
-      if (instruction->InputAt(1) == phi) {
-        InductionInfo* initial = LookupInfo(loop, phi->InputAt(0));
+      if (phi->InputAt(1) == entry_phi) {
+        InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0));
         return CreateInduction(kPeriodic, a, initial);
       }
-      auto it = cycle_.find(instruction->InputAt(1));
-      if (it != cycle_.end()) {
-        InductionInfo* b = it->second;
-        if (b->induction_class == kPeriodic) {
-          return CreateInduction(kPeriodic, a, b);
-        }
+      InductionInfo* b = SolvePhi(phi, /* input_index */ 1);
+      if (b != nullptr && b->induction_class == kPeriodic) {
+        return CreateInduction(kPeriodic, a, b);
       }
     }
   }
-
   return nullptr;
 }
 
 HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopInformation* loop,
-                                                                         HInstruction* phi,
+                                                                         HInstruction* entry_phi,
                                                                          HInstruction* instruction,
                                                                          HInstruction* x,
                                                                          HInstruction* y,
@@ -471,7 +476,7 @@
   // invariant value, seeded from phi, keeps adding to the stride of the induction.
   InductionInfo* b = LookupInfo(loop, y);
   if (b != nullptr && b->induction_class == kInvariant) {
-    if (x == phi) {
+    if (x == entry_phi) {
       return (op == kAdd) ? b : CreateInvariantOp(kNeg, nullptr, b);
     }
     auto it = cycle_.find(x);
@@ -487,14 +492,15 @@
   if (op == kAdd) {
     // Try the other way around for an addition if considered for first time.
     if (is_first_call) {
-      return SolveAddSub(loop, phi, instruction, y, x, op, false);
+      return SolveAddSub(loop, entry_phi, instruction, y, x, op, false);
     }
   } else if (op == kSub) {
-    // Solve within a tight cycle for a periodic idiom k = c - k;
-    if (y == phi && instruction == phi->InputAt(1)) {
+    // Solve within a tight cycle that is formed by exactly two instructions,
+    // one phi and one update, for a periodic idiom of the form k = c - k;
+    if (y == entry_phi && entry_phi->InputCount() == 2 && instruction == entry_phi->InputAt(1)) {
       InductionInfo* a = LookupInfo(loop, x);
       if (a != nullptr && a->induction_class == kInvariant) {
-        InductionInfo* initial = LookupInfo(loop, phi->InputAt(0));
+        InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0));
         return CreateInduction(kPeriodic, CreateInvariantOp(kSub, a, initial), initial);
       }
     }
@@ -539,42 +545,46 @@
                                            Primitive::Type type,
                                            IfCondition cmp) {
   if (a->induction_class == kInvariant && b->induction_class == kLinear) {
-    // Swap conditions (e.g. U > i is same as i < U).
+    // Swap condition if induction is at right-hand-side (e.g. U > i is same as i < U).
     switch (cmp) {
       case kCondLT: VisitCondition(loop, b, a, type, kCondGT); break;
       case kCondLE: VisitCondition(loop, b, a, type, kCondGE); break;
       case kCondGT: VisitCondition(loop, b, a, type, kCondLT); break;
       case kCondGE: VisitCondition(loop, b, a, type, kCondLE); break;
+      case kCondNE: VisitCondition(loop, b, a, type, kCondNE); break;
       default: break;
     }
   } else if (a->induction_class == kLinear && b->induction_class == kInvariant) {
-    // Normalize a linear loop control with a constant, nonzero stride:
+    // Analyze condition with induction at left-hand-side (e.g. i < U).
+    InductionInfo* lower_expr = a->op_b;
+    InductionInfo* upper_expr = b;
+    InductionInfo* stride = a->op_a;
+    int64_t stride_value = 0;
+    if (!IsIntAndGet(stride, &stride_value)) {
+      return;
+    }
+    // Rewrite condition i != U into i < U or i > U if end condition is reached exactly.
+    if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLT)) ||
+                           (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGT)))) {
+      cmp = stride_value > 0 ? kCondLT : kCondGT;
+    }
+    // Normalize a linear loop control with a nonzero stride:
     //   stride > 0, either i < U or i <= U
     //   stride < 0, either i > U or i >= U
-    InductionInfo* stride = a->op_a;
-    InductionInfo* lo_val = a->op_b;
-    InductionInfo* hi_val = b;
-    // Analyze the stride thoroughly, since its representation may be compound at this point.
-    InductionVarRange::Value v1 = InductionVarRange::GetMin(stride, nullptr);
-    InductionVarRange::Value v2 = InductionVarRange::GetMax(stride, nullptr);
-    if (v1.a_constant == 0 && v2.a_constant == 0 && v1.b_constant == v2.b_constant) {
-      const int32_t stride_value = v1.b_constant;
-      if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) ||
-          (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) {
-        bool is_strict = cmp == kCondLT || cmp == kCondGT;
-        VisitTripCount(loop, lo_val, hi_val, stride, stride_value, type, is_strict);
-      }
+    if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) ||
+        (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) {
+      VisitTripCount(loop, lower_expr, upper_expr, stride, stride_value, type, cmp);
     }
   }
 }
 
 void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop,
-                                           InductionInfo* lo_val,
-                                           InductionInfo* hi_val,
+                                           InductionInfo* lower_expr,
+                                           InductionInfo* upper_expr,
                                            InductionInfo* stride,
-                                           int32_t stride_value,
+                                           int64_t stride_value,
                                            Primitive::Type type,
-                                           bool is_strict) {
+                                           IfCondition cmp) {
   // Any loop of the general form:
   //
   //    for (i = L; i <= U; i += S) // S > 0
@@ -586,29 +596,95 @@
   //    for (n = 0; n < TC; n++) // where TC = (U + S - L) / S
   //      .. L + S * n ..
   //
-  // NOTE: The TC (trip-count) expression is only valid if the top-test path is taken at
-  //       least once. Otherwise TC is 0. Also, the expression assumes the loop does not
-  //       have any early-exits. Otherwise, TC is an upper bound.
+  // taking the following into consideration:
   //
-  bool cancels = is_strict && std::abs(stride_value) == 1;  // compensation cancels conversion?
+  // (1) Using the same precision, the TC (trip-count) expression should be interpreted as
+  //     an unsigned entity, for example, as in the following loop that uses the full range:
+  //     for (int i = INT_MIN; i < INT_MAX; i++) // TC = UINT_MAX
+  // (2) The TC is only valid if the loop is taken, otherwise TC = 0, as in:
+  //     for (int i = 12; i < U; i++) // TC = 0 when U >= 12
+  //     If this cannot be determined at compile-time, the TC is only valid within the
+  //     loop-body proper, not the loop-header unless enforced with an explicit condition.
+  // (3) The TC is only valid if the loop is finite, otherwise TC has no value, as in:
+  //     for (int i = 0; i <= U; i++) // TC = Inf when U = INT_MAX
+  //     If this cannot be determined at compile-time, the TC is only valid when enforced
+  //     with an explicit condition.
+  // (4) For loops which early-exits, the TC forms an upper bound, as in:
+  //     for (int i = 0; i < 10 && ....; i++) // TC <= 10
+  const bool is_taken = IsTaken(lower_expr, upper_expr, cmp);
+  const bool is_finite = IsFinite(upper_expr, stride_value, type, cmp);
+  const bool cancels = (cmp == kCondLT || cmp == kCondGT) && std::abs(stride_value) == 1;
   if (!cancels) {
     // Convert exclusive integral inequality into inclusive integral inequality,
     // viz. condition i < U is i <= U - 1 and condition i > U is i >= U + 1.
-    if (is_strict) {
-      const InductionOp op = stride_value > 0 ? kSub : kAdd;
-      hi_val = CreateInvariantOp(op, hi_val, CreateConstant(1, type));
+    if (cmp == kCondLT) {
+      upper_expr = CreateInvariantOp(kSub, upper_expr, CreateConstant(1, type));
+    } else if (cmp == kCondGT) {
+      upper_expr = CreateInvariantOp(kAdd, upper_expr, CreateConstant(1, type));
     }
     // Compensate for stride.
-    hi_val = CreateInvariantOp(kAdd, hi_val, stride);
+    upper_expr = CreateInvariantOp(kAdd, upper_expr, stride);
   }
-
+  InductionInfo* trip_count
+      = CreateInvariantOp(kDiv, CreateInvariantOp(kSub, upper_expr, lower_expr), stride);
   // Assign the trip-count expression to the loop control. Clients that use the information
-  // should be aware that due to the top-test assumption, the expression is only valid in the
-  // loop-body proper, and not yet in the loop-header. If the loop has any early exits, the
-  // trip-count forms a conservative upper bound on the number of loop iterations.
-  InductionInfo* trip_count =
-      CreateInvariantOp(kDiv, CreateInvariantOp(kSub, hi_val, lo_val), stride);
-  AssignInfo(loop, loop->GetHeader()->GetLastInstruction(), trip_count);
+  // should be aware that the expression is only valid under the conditions listed above.
+  InductionOp tcKind = kTripCountInBodyUnsafe;
+  if (is_taken && is_finite) {
+    tcKind = kTripCountInLoop;
+  } else if (is_finite) {
+    tcKind = kTripCountInBody;
+  } else if (is_taken) {
+    tcKind = kTripCountInLoopUnsafe;
+  }
+  AssignInfo(loop, loop->GetHeader()->GetLastInstruction(), CreateTripCount(tcKind, trip_count));
+}
+
+bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr,
+                                    InductionInfo* upper_expr,
+                                    IfCondition cmp) {
+  int64_t lower_value;
+  int64_t upper_value;
+  if (IsIntAndGet(lower_expr, &lower_value) && IsIntAndGet(upper_expr, &upper_value)) {
+    switch (cmp) {
+      case kCondLT: return lower_value <  upper_value;
+      case kCondLE: return lower_value <= upper_value;
+      case kCondGT: return lower_value >  upper_value;
+      case kCondGE: return lower_value >= upper_value;
+      case kCondEQ:
+      case kCondNE: LOG(FATAL) << "CONDITION UNREACHABLE";
+    }
+  }
+  return false;  // not certain, may be untaken
+}
+
+bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr,
+                                     int64_t stride_value,
+                                     Primitive::Type type,
+                                     IfCondition cmp) {
+  const int64_t min = type == Primitive::kPrimInt
+      ? std::numeric_limits<int32_t>::min()
+      : std::numeric_limits<int64_t>::min();
+  const int64_t max = type == Primitive::kPrimInt
+        ? std::numeric_limits<int32_t>::max()
+        : std::numeric_limits<int64_t>::max();
+  // Some rules under which it is certain at compile-time that the loop is finite.
+  int64_t value;
+  switch (cmp) {
+    case kCondLT:
+      return stride_value == 1 ||
+          (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value + 1));
+    case kCondLE:
+      return (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value));
+    case kCondGT:
+      return stride_value == -1 ||
+          (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value - 1));
+    case kCondGE:
+      return (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value));
+    case kCondEQ:
+    case kCondNE: LOG(FATAL) << "CONDITION UNREACHABLE";
+  }
+  return false;  // not certain, may be infinite
 }
 
 void HInductionVarAnalysis::AssignInfo(HLoopInformation* loop,
@@ -618,7 +694,8 @@
   if (it == induction_.end()) {
     it = induction_.Put(loop,
                         ArenaSafeMap<HInstruction*, InductionInfo*>(
-                            std::less<HInstruction*>(), graph_->GetArena()->Adapter()));
+                            std::less<HInstruction*>(),
+                            graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)));
   }
   it->second.Put(instruction, info);
 }
@@ -725,13 +802,22 @@
 }
 
 bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) {
-  if (info != nullptr && info->induction_class == kInvariant && info->operation == kFetch) {
-    DCHECK(info->fetch);
-    if (info->fetch->IsIntConstant()) {
-      *value = info->fetch->AsIntConstant()->GetValue();
-      return true;
-    } else if (info->fetch->IsLongConstant()) {
-      *value = info->fetch->AsLongConstant()->GetValue();
+  if (info != nullptr && info->induction_class == kInvariant) {
+    // A direct constant fetch.
+    if (info->operation == kFetch) {
+      DCHECK(info->fetch);
+      if (info->fetch->IsIntConstant()) {
+        *value = info->fetch->AsIntConstant()->GetValue();
+        return true;
+      } else if (info->fetch->IsLongConstant()) {
+        *value = info->fetch->AsLongConstant()->GetValue();
+        return true;
+      }
+    }
+    // Use range analysis to resolve compound values.
+    int32_t range_value;
+    if (InductionVarRange::GetConstant(info, &range_value)) {
+      *value = range_value;
       return true;
     }
   }
@@ -759,6 +845,10 @@
             inv += std::to_string(info->fetch->GetId()) + ":" + info->fetch->DebugName();
           }
           break;
+        case kTripCountInLoop:       inv += "TC-loop:"; break;
+        case kTripCountInBody:       inv += "TC-body:"; break;
+        case kTripCountInLoopUnsafe: inv += "TC-loop-unsafe:"; break;
+        case kTripCountInBodyUnsafe: inv += "TC-body-unsafe:"; break;
       }
       inv += InductionToString(info->op_b);
       return inv + ")";
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 8eccf92..7ab80cd 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -56,13 +56,20 @@
   };
 
   enum InductionOp {
-    kNop,  // no-operation: a true induction
+    // No-operation: a true induction.
+    kNop,
+    // Various invariant operations.
     kAdd,
     kSub,
     kNeg,
     kMul,
     kDiv,
-    kFetch
+    kFetch,
+    // Trip counts (valid in full loop or only body proper; unsafe implies loop may be infinite).
+    kTripCountInLoop,
+    kTripCountInBody,
+    kTripCountInLoopUnsafe,
+    kTripCountInBodyUnsafe
   };
 
   /**
@@ -77,8 +84,10 @@
    *         nop: a, then defined by b
    *   (4) periodic
    *         nop: a, then defined by b (repeated when exhausted)
+   *   (5) trip-count:
+   *         tc: defined by b
    */
-  struct InductionInfo : public ArenaObject<kArenaAllocMisc> {
+  struct InductionInfo : public ArenaObject<kArenaAllocInductionVarAnalysis> {
     InductionInfo(InductionClass ic,
                   InductionOp op,
                   InductionInfo* a,
@@ -110,6 +119,10 @@
     return new (graph_->GetArena()) InductionInfo(kInvariant, kFetch, nullptr, nullptr, f);
   }
 
+  InductionInfo* CreateTripCount(InductionOp op, InductionInfo* b) {
+    return new (graph_->GetArena()) InductionInfo(kInvariant, op, nullptr, b, nullptr);
+  }
+
   InductionInfo* CreateInduction(InductionClass ic, InductionInfo* a, InductionInfo* b) {
     DCHECK(a != nullptr && b != nullptr);
     return new (graph_->GetArena()) InductionInfo(ic, kNop, a, b, nullptr);
@@ -121,26 +134,27 @@
   uint32_t VisitDescendant(HLoopInformation* loop, HInstruction* instruction);
   void ClassifyTrivial(HLoopInformation* loop, HInstruction* instruction);
   void ClassifyNonTrivial(HLoopInformation* loop);
+  InductionInfo* RotatePeriodicInduction(InductionInfo* induction, InductionInfo* last);
 
   // Transfer operations.
-  InductionInfo* TransferPhi(InductionInfo* a, InductionInfo* b);
+  InductionInfo* TransferPhi(HLoopInformation* loop, HInstruction* phi, size_t input_index);
   InductionInfo* TransferAddSub(InductionInfo* a, InductionInfo* b, InductionOp op);
   InductionInfo* TransferMul(InductionInfo* a, InductionInfo* b);
   InductionInfo* TransferShl(InductionInfo* a, InductionInfo* b, Primitive::Type type);
   InductionInfo* TransferNeg(InductionInfo* a);
 
   // Solvers.
-  InductionInfo* SolvePhi(HLoopInformation* loop,
-                          HInstruction* phi,
-                          HInstruction* instruction);
+  InductionInfo* SolvePhi(HInstruction* phi, size_t input_index);
+  InductionInfo* SolvePhiAllInputs(HLoopInformation* loop,
+                                   HInstruction* entry_phi,
+                                   HInstruction* phi);
   InductionInfo* SolveAddSub(HLoopInformation* loop,
-                             HInstruction* phi,
+                             HInstruction* entry_phi,
                              HInstruction* instruction,
                              HInstruction* x,
                              HInstruction* y,
                              InductionOp op,
                              bool is_first_call);
-  InductionInfo* RotatePeriodicInduction(InductionInfo* induction, InductionInfo* last);
 
   // Trip count information.
   void VisitControl(HLoopInformation* loop);
@@ -150,12 +164,17 @@
                       Primitive::Type type,
                       IfCondition cmp);
   void VisitTripCount(HLoopInformation* loop,
-                      InductionInfo* lo_val,
-                      InductionInfo* hi_val,
+                      InductionInfo* lower_expr,
+                      InductionInfo* upper_expr,
                       InductionInfo* stride,
-                      int32_t stride_value,
+                      int64_t stride_value,
                       Primitive::Type type,
-                      bool is_strict);
+                      IfCondition cmp);
+  bool IsTaken(InductionInfo* lower_expr, InductionInfo* upper_expr, IfCondition cmp);
+  bool IsFinite(InductionInfo* upper_expr,
+                int64_t stride_value,
+                Primitive::Type type,
+                IfCondition cmp);
 
   // Assign and lookup.
   void AssignInfo(HLoopInformation* loop, HInstruction* instruction, InductionInfo* info);
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index fca1ca5..20492e7 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -20,6 +20,7 @@
 #include "builder.h"
 #include "gtest/gtest.h"
 #include "induction_var_analysis.h"
+#include "induction_var_range.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 
@@ -233,7 +234,8 @@
   EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[0], 0).c_str());
 
   // Trip-count.
-  EXPECT_STREQ("(100)", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
+  EXPECT_STREQ("(TC-loop:(100))",
+               GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str());
 }
 
 TEST_F(InductionVarAnalysisTest, FindDerivedInduction) {
@@ -388,7 +390,7 @@
   HInstruction* store = InsertArrayStore(induc_, 0);
   InsertLocalStore(induc_, InsertLocalLoad(tmp_, 0), 0);
   HInstruction *sub = InsertInstruction(
-       new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0);
   InsertLocalStore(tmp_, sub, 0);
   PerformInductionVarAnalysis();
 
@@ -412,16 +414,16 @@
       new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
   InsertLocalStore(tmp_, add, 0);
   HInstruction *sub = InsertInstruction(
-       new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
   InsertLocalStore(tmp_, sub, 0);
   HInstruction *mul = InsertInstruction(
-       new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
+      new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
   InsertLocalStore(tmp_, mul, 0);
   HInstruction *shl = InsertInstruction(
-       new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
+      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
   InsertLocalStore(tmp_, shl, 0);
   HInstruction *neg = InsertInstruction(
-       new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0);
+      new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0);
   InsertLocalStore(tmp_, neg, 0);
   InsertLocalStore(
       induc_,
@@ -471,7 +473,7 @@
   BuildLoopNest(1);
   HInstruction* store = InsertArrayStore(induc_, 0);
   HInstruction *sub = InsertInstruction(
-         new (&allocator_) HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0);
   InsertLocalStore(induc_, sub, 0);
   PerformInductionVarAnalysis();
 
@@ -497,19 +499,19 @@
                         HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0), 0);
   // Derived expressions.
   HInstruction *add = InsertInstruction(
-       new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
+      new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
   InsertLocalStore(tmp_, add, 0);
   HInstruction *sub = InsertInstruction(
-       new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
+      new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
   InsertLocalStore(tmp_, sub, 0);
   HInstruction *mul = InsertInstruction(
-       new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
+      new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0);
   InsertLocalStore(tmp_, mul, 0);
   HInstruction *shl = InsertInstruction(
-       new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
+      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
   InsertLocalStore(tmp_, shl, 0);
   HInstruction *neg = InsertInstruction(
-       new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0);
+      new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0);
   InsertLocalStore(tmp_, neg, 0);
   PerformInductionVarAnalysis();
 
@@ -520,6 +522,36 @@
   EXPECT_STREQ("periodic(( - (1)), (0))", GetInductionInfo(neg, 0).c_str());
 }
 
+TEST_F(InductionVarAnalysisTest, FindRange) {
+  // Setup:
+  // for (int i = 0; i < 100; i++) {
+  //   k = i << 1;
+  //   k = k + 1;
+  //   a[k] = 0;
+  // }
+  BuildLoopNest(1);
+  HInstruction *shl = InsertInstruction(
+      new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0), constant1_), 0);
+  InsertLocalStore(induc_, shl, 0);
+  HInstruction *add = InsertInstruction(
+      new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0);
+  InsertLocalStore(induc_, add, 0);
+  HInstruction* store = InsertArrayStore(induc_, 0);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("((2) * i + (1))", GetInductionInfo(store->InputAt(1), 0).c_str());
+
+  InductionVarRange range(iva_);
+  InductionVarRange::Value v_min = range.GetMinInduction(store, store->InputAt(1));
+  InductionVarRange::Value v_max = range.GetMaxInduction(store, store->InputAt(1));
+  ASSERT_TRUE(v_min.is_known);
+  EXPECT_EQ(0, v_min.a_constant);
+  EXPECT_EQ(1, v_min.b_constant);
+  ASSERT_TRUE(v_max.is_known);
+  EXPECT_EQ(0, v_max.a_constant);
+  EXPECT_EQ(199, v_max.b_constant);
+}
+
 TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) {
   // Setup:
   // k = 0;
@@ -550,7 +582,8 @@
     }
     EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[d], d).c_str());
     // Trip-count.
-    EXPECT_STREQ("(100)", GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str());
+    EXPECT_STREQ("(TC-loop:(100))",
+                 GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str());
   }
 }
 
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 486e904..db12819 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -14,120 +14,109 @@
  * limitations under the License.
  */
 
-#include <limits.h>
-
 #include "induction_var_range.h"
 
+#include <limits>
+
 namespace art {
 
-static bool IsValidConstant32(int32_t c) {
-  return INT_MIN < c && c < INT_MAX;
+/** Returns true if 64-bit constant fits in 32-bit constant. */
+static bool CanLongValueFitIntoInt(int64_t c) {
+  return std::numeric_limits<int32_t>::min() <= c && c <= std::numeric_limits<int32_t>::max();
 }
 
-static bool IsValidConstant64(int64_t c) {
-  return INT_MIN < c && c < INT_MAX;
-}
-
-/** Returns true if 32-bit addition can be done safely (and is not an unknown range). */
+/** Returns true if 32-bit addition can be done safely. */
 static bool IsSafeAdd(int32_t c1, int32_t c2) {
-  if (IsValidConstant32(c1) && IsValidConstant32(c2)) {
-    return IsValidConstant64(static_cast<int64_t>(c1) + static_cast<int64_t>(c2));
-  }
-  return false;
+  return CanLongValueFitIntoInt(static_cast<int64_t>(c1) + static_cast<int64_t>(c2));
 }
 
-/** Returns true if 32-bit subtraction can be done safely (and is not an unknown range). */
+/** Returns true if 32-bit subtraction can be done safely. */
 static bool IsSafeSub(int32_t c1, int32_t c2) {
-  if (IsValidConstant32(c1) && IsValidConstant32(c2)) {
-    return IsValidConstant64(static_cast<int64_t>(c1) - static_cast<int64_t>(c2));
-  }
-  return false;
+  return CanLongValueFitIntoInt(static_cast<int64_t>(c1) - static_cast<int64_t>(c2));
 }
 
-/** Returns true if 32-bit multiplication can be done safely (and is not an unknown range). */
+/** Returns true if 32-bit multiplication can be done safely. */
 static bool IsSafeMul(int32_t c1, int32_t c2) {
-  if (IsValidConstant32(c1) && IsValidConstant32(c2)) {
-    return IsValidConstant64(static_cast<int64_t>(c1) * static_cast<int64_t>(c2));
-  }
-  return false;
+  return CanLongValueFitIntoInt(static_cast<int64_t>(c1) * static_cast<int64_t>(c2));
 }
 
-/** Returns true if 32-bit division can be done safely (and is not an unknown range). */
+/** Returns true if 32-bit division can be done safely. */
 static bool IsSafeDiv(int32_t c1, int32_t c2) {
-  if (IsValidConstant32(c1) && IsValidConstant32(c2) && c2 != 0) {
-    return IsValidConstant64(static_cast<int64_t>(c1) / static_cast<int64_t>(c2));
-  }
-  return false;
+  return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2));
 }
 
-/** Returns true for 32/64-bit integral constant within known range. */
+/** Returns true for 32/64-bit integral constant. */
 static bool IsIntAndGet(HInstruction* instruction, int32_t* value) {
   if (instruction->IsIntConstant()) {
-    const int32_t c = instruction->AsIntConstant()->GetValue();
-    if (IsValidConstant32(c)) {
-      *value = c;
-      return true;
-    }
+    *value = instruction->AsIntConstant()->GetValue();
+    return true;
   } else if (instruction->IsLongConstant()) {
     const int64_t c = instruction->AsLongConstant()->GetValue();
-    if (IsValidConstant64(c)) {
-      *value = c;
+    if (CanLongValueFitIntoInt(c)) {
+      *value = static_cast<int32_t>(c);
       return true;
     }
   }
   return false;
 }
 
+/**
+ * An upper bound a * (length / a) + b, where a > 0, can be conservatively rewritten as length + b
+ * because length >= 0 is true. This makes it more likely the bound is useful to clients.
+ */
+static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) {
+  int32_t value;
+  if (v.a_constant > 1 &&
+      v.instruction->IsDiv() &&
+      v.instruction->InputAt(0)->IsArrayLength() &&
+      IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
+    return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
+  }
+  return v;
+}
+
 //
 // Public class methods.
 //
 
 InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis)
     : induction_analysis_(induction_analysis) {
+  DCHECK(induction_analysis != nullptr);
 }
 
 InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context,
                                                             HInstruction* instruction) {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();
-  if (loop != nullptr && induction_analysis_ != nullptr) {
-    return GetMin(induction_analysis_->LookupInfo(loop, instruction), GetTripCount(loop, context));
-  }
-  return Value(INT_MIN);
+  return GetInduction(context, instruction, /* is_min */ true);
 }
 
 InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context,
                                                             HInstruction* instruction) {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();
-  if (loop != nullptr && induction_analysis_ != nullptr) {
-    return GetMax(induction_analysis_->LookupInfo(loop, instruction), GetTripCount(loop, context));
-  }
-  return Value(INT_MAX);
+  return SimplifyMax(GetInduction(context, instruction, /* is_min */ false));
 }
 
 //
 // Private class methods.
 //
 
-HInductionVarAnalysis::InductionInfo* InductionVarRange::GetTripCount(HLoopInformation* loop,
-                                                                      HInstruction* context) {
-  // The trip-count expression is only valid when the top-test is taken at least once,
-  // that means, when the analyzed context appears outside the loop header itself.
-  // Early-exit loops are okay, since in those cases, the trip-count is conservative.
-  if (context->GetBlock() != loop->GetHeader()) {
-    HInductionVarAnalysis::InductionInfo* trip =
-        induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction());
-    if (trip != nullptr) {
-      // Wrap the trip-count representation in its own unusual NOP node, so that range analysis
-      // is able to determine the [0, TC - 1] interval without having to construct constants.
-      return induction_analysis_->CreateInvariantOp(HInductionVarAnalysis::kNop, trip, trip);
-    }
+InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context,
+                                                         HInstruction* instruction,
+                                                         bool is_min) {
+  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
+  if (loop != nullptr) {
+    HBasicBlock* header = loop->GetHeader();
+    bool in_body = context->GetBlock() != header;
+    return GetVal(induction_analysis_->LookupInfo(loop, instruction),
+                  induction_analysis_->LookupInfo(loop, header->GetLastInstruction()),
+                  in_body,
+                  is_min);
   }
-  return nullptr;
+  return Value();
 }
 
 InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
                                                      HInductionVarAnalysis::InductionInfo* trip,
-                                                     int32_t fail_value) {
+                                                     bool in_body,
+                                                     bool is_min) {
   // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes
   // more likely range analysis will compare the same instructions as terminal nodes.
   int32_t value;
@@ -135,157 +124,147 @@
     return Value(value);
   } else if (instruction->IsAdd()) {
     if (IsIntAndGet(instruction->InputAt(0), &value)) {
-      return AddValue(Value(value),
-                      GetFetch(instruction->InputAt(1), trip, fail_value), fail_value);
+      return AddValue(Value(value), GetFetch(instruction->InputAt(1), trip, in_body, is_min));
     } else if (IsIntAndGet(instruction->InputAt(1), &value)) {
-      return AddValue(GetFetch(instruction->InputAt(0), trip, fail_value),
-                      Value(value), fail_value);
+      return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value));
     }
-  } else if (fail_value < 0) {
-    // Special case: within the loop-body, minimum of trip-count is 1.
-    if (trip != nullptr && instruction == trip->op_b->fetch) {
+  } else if (is_min) {
+    // Special case for finding minimum: minimum of trip-count in loop-body is 1.
+    if (trip != nullptr && in_body && instruction == trip->op_b->fetch) {
       return Value(1);
     }
   }
   return Value(instruction, 1, 0);
 }
 
-InductionVarRange::Value InductionVarRange::GetMin(HInductionVarAnalysis::InductionInfo* info,
-                                                   HInductionVarAnalysis::InductionInfo* trip) {
+InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::InductionInfo* info,
+                                                   HInductionVarAnalysis::InductionInfo* trip,
+                                                   bool in_body,
+                                                   bool is_min) {
   if (info != nullptr) {
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
         // Invariants.
         switch (info->operation) {
-          case HInductionVarAnalysis::kNop:  // normalized: 0
-            DCHECK_EQ(info->op_a, info->op_b);
-            return Value(0);
           case HInductionVarAnalysis::kAdd:
-            return AddValue(GetMin(info->op_a, trip), GetMin(info->op_b, trip), INT_MIN);
-          case HInductionVarAnalysis::kSub:  // second max!
-            return SubValue(GetMin(info->op_a, trip), GetMax(info->op_b, trip), INT_MIN);
-          case HInductionVarAnalysis::kNeg:  // second max!
-            return SubValue(Value(0), GetMax(info->op_b, trip), INT_MIN);
+            return AddValue(GetVal(info->op_a, trip, in_body, is_min),
+                            GetVal(info->op_b, trip, in_body, is_min));
+          case HInductionVarAnalysis::kSub:  // second reversed!
+            return SubValue(GetVal(info->op_a, trip, in_body, is_min),
+                            GetVal(info->op_b, trip, in_body, !is_min));
+          case HInductionVarAnalysis::kNeg:  // second reversed!
+            return SubValue(Value(0),
+                            GetVal(info->op_b, trip, in_body, !is_min));
           case HInductionVarAnalysis::kMul:
-            return GetMul(info->op_a, info->op_b, trip, INT_MIN);
+            return GetMul(info->op_a, info->op_b, trip, in_body, is_min);
           case HInductionVarAnalysis::kDiv:
-            return GetDiv(info->op_a, info->op_b, trip, INT_MIN);
+            return GetDiv(info->op_a, info->op_b, trip, in_body, is_min);
           case HInductionVarAnalysis::kFetch:
-            return GetFetch(info->fetch, trip, INT_MIN);
+            return GetFetch(info->fetch, trip, in_body, is_min);
+          case HInductionVarAnalysis::kTripCountInLoop:
+            if (!in_body) {
+              return is_min ? Value(0)
+                            : GetVal(info->op_b, trip, in_body, is_min);   // one extra!
+            }
+            FALLTHROUGH_INTENDED;
+          case HInductionVarAnalysis::kTripCountInBody:
+            if (in_body) {
+              return is_min ? Value(0)
+                            : SubValue(GetVal(info->op_b, trip, in_body, is_min), Value(1));
+            }
+            break;
+          default:
+            break;
         }
         break;
       case HInductionVarAnalysis::kLinear:
-        // Minimum over linear induction a * i + b, for normalized 0 <= i < TC.
-        return AddValue(GetMul(info->op_a, trip, trip, INT_MIN),
-                        GetMin(info->op_b, trip), INT_MIN);
+        // Linear induction a * i + b, for normalized 0 <= i < TC.
+        return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min),
+                        GetVal(info->op_b, trip, in_body, is_min));
       case HInductionVarAnalysis::kWrapAround:
       case HInductionVarAnalysis::kPeriodic:
-        // Minimum over all values in the wrap-around/periodic.
-        return MinValue(GetMin(info->op_a, trip), GetMin(info->op_b, trip));
+        // Merge values in the wrap-around/periodic.
+        return MergeVal(GetVal(info->op_a, trip, in_body, is_min),
+                        GetVal(info->op_b, trip, in_body, is_min), is_min);
     }
   }
-  return Value(INT_MIN);
-}
-
-InductionVarRange::Value InductionVarRange::GetMax(HInductionVarAnalysis::InductionInfo* info,
-                                                   HInductionVarAnalysis::InductionInfo* trip) {
-  if (info != nullptr) {
-    switch (info->induction_class) {
-      case HInductionVarAnalysis::kInvariant:
-        // Invariants.
-        switch (info->operation) {
-          case HInductionVarAnalysis::kNop:    // normalized: TC - 1
-            DCHECK_EQ(info->op_a, info->op_b);
-            return SubValue(GetMax(info->op_b, trip), Value(1), INT_MAX);
-          case HInductionVarAnalysis::kAdd:
-            return AddValue(GetMax(info->op_a, trip), GetMax(info->op_b, trip), INT_MAX);
-          case HInductionVarAnalysis::kSub:  // second min!
-            return SubValue(GetMax(info->op_a, trip), GetMin(info->op_b, trip), INT_MAX);
-          case HInductionVarAnalysis::kNeg:  // second min!
-            return SubValue(Value(0), GetMin(info->op_b, trip), INT_MAX);
-          case HInductionVarAnalysis::kMul:
-            return GetMul(info->op_a, info->op_b, trip, INT_MAX);
-          case HInductionVarAnalysis::kDiv:
-            return GetDiv(info->op_a, info->op_b, trip, INT_MAX);
-          case HInductionVarAnalysis::kFetch:
-            return GetFetch(info->fetch, trip, INT_MAX);
-        }
-        break;
-      case HInductionVarAnalysis::kLinear:
-        // Maximum over linear induction a * i + b, for normalized 0 <= i < TC.
-        return AddValue(GetMul(info->op_a, trip, trip, INT_MAX),
-                        GetMax(info->op_b, trip), INT_MAX);
-      case HInductionVarAnalysis::kWrapAround:
-      case HInductionVarAnalysis::kPeriodic:
-        // Maximum over all values in the wrap-around/periodic.
-        return MaxValue(GetMax(info->op_a, trip), GetMax(info->op_b, trip));
-    }
-  }
-  return Value(INT_MAX);
+  return Value();
 }
 
 InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::InductionInfo* info1,
                                                    HInductionVarAnalysis::InductionInfo* info2,
                                                    HInductionVarAnalysis::InductionInfo* trip,
-                                                   int32_t fail_value) {
-  Value v1_min = GetMin(info1, trip);
-  Value v1_max = GetMax(info1, trip);
-  Value v2_min = GetMin(info2, trip);
-  Value v2_max = GetMax(info2, trip);
-  if (v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
+                                                   bool in_body,
+                                                   bool is_min) {
+  Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
+  Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
+  Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
+  Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
+  if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
     // Positive range vs. positive or negative range.
-    if (v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return (fail_value < 0) ? MulValue(v1_min, v2_min, fail_value)
-                              : MulValue(v1_max, v2_max, fail_value);
-    } else if (v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return (fail_value < 0) ? MulValue(v1_max, v2_min, fail_value)
-                              : MulValue(v1_min, v2_max, fail_value);
+    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
+      return is_min ? MulValue(v1_min, v2_min)
+                    : MulValue(v1_max, v2_max);
+    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
+      return is_min ? MulValue(v1_max, v2_min)
+                    : MulValue(v1_min, v2_max);
     }
-  } else if (v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
+  } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
     // Negative range vs. positive or negative range.
-    if (v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return (fail_value < 0) ? MulValue(v1_min, v2_max, fail_value)
-                              : MulValue(v1_max, v2_min, fail_value);
-    } else if (v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return (fail_value < 0) ? MulValue(v1_max, v2_max, fail_value)
-                              : MulValue(v1_min, v2_min, fail_value);
+    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
+      return is_min ? MulValue(v1_min, v2_max)
+                    : MulValue(v1_max, v2_min);
+    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
+      return is_min ? MulValue(v1_max, v2_max)
+                    : MulValue(v1_min, v2_min);
     }
   }
-  return Value(fail_value);
+  return Value();
 }
 
 InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::InductionInfo* info1,
                                                    HInductionVarAnalysis::InductionInfo* info2,
                                                    HInductionVarAnalysis::InductionInfo* trip,
-                                                   int32_t fail_value) {
-  Value v1_min = GetMin(info1, trip);
-  Value v1_max = GetMax(info1, trip);
-  Value v2_min = GetMin(info2, trip);
-  Value v2_max = GetMax(info2, trip);
-  if (v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
+                                                   bool in_body,
+                                                   bool is_min) {
+  Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
+  Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
+  Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
+  Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
+  if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
     // Positive range vs. positive or negative range.
-    if (v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return (fail_value < 0) ? DivValue(v1_min, v2_max, fail_value)
-                              : DivValue(v1_max, v2_min, fail_value);
-    } else if (v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return (fail_value < 0) ? DivValue(v1_max, v2_max, fail_value)
-                              : DivValue(v1_min, v2_min, fail_value);
+    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
+      return is_min ? DivValue(v1_min, v2_max)
+                    : DivValue(v1_max, v2_min);
+    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
+      return is_min ? DivValue(v1_max, v2_max)
+                    : DivValue(v1_min, v2_min);
     }
-  } else if (v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
+  } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
     // Negative range vs. positive or negative range.
-    if (v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
-      return (fail_value < 0) ? DivValue(v1_min, v2_min, fail_value)
-                              : DivValue(v1_max, v2_max, fail_value);
-    } else if (v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
-      return (fail_value < 0) ? DivValue(v1_max, v2_min, fail_value)
-                              : DivValue(v1_min, v2_max, fail_value);
+    if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
+      return is_min ? DivValue(v1_min, v2_min)
+                    : DivValue(v1_max, v2_max);
+    } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) {
+      return is_min ? DivValue(v1_max, v2_min)
+                    : DivValue(v1_min, v2_max);
     }
   }
-  return Value(fail_value);
+  return Value();
 }
 
-InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2, int32_t fail_value) {
-  if (IsSafeAdd(v1.b_constant, v2.b_constant)) {
+bool InductionVarRange::GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value) {
+  Value v_min = GetVal(info, nullptr, false, /* is_min */ true);
+  Value v_max = GetVal(info, nullptr, false, /* is_min */ false);
+  if (v_min.a_constant == 0 && v_max.a_constant == 0 && v_min.b_constant == v_max.b_constant) {
+    *value = v_min.b_constant;
+    return true;
+  }
+  return false;
+}
+
+InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) {
+  if (v1.is_known && v2.is_known && IsSafeAdd(v1.b_constant, v2.b_constant)) {
     const int32_t b = v1.b_constant + v2.b_constant;
     if (v1.a_constant == 0) {
       return Value(v2.instruction, v2.a_constant, b);
@@ -295,11 +274,11 @@
       return Value(v1.instruction, v1.a_constant + v2.a_constant, b);
     }
   }
-  return Value(fail_value);
+  return Value();
 }
 
-InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2, int32_t fail_value) {
-  if (IsSafeSub(v1.b_constant, v2.b_constant)) {
+InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) {
+  if (v1.is_known && v2.is_known && IsSafeSub(v1.b_constant, v2.b_constant)) {
     const int32_t b = v1.b_constant - v2.b_constant;
     if (v1.a_constant == 0 && IsSafeSub(0, v2.a_constant)) {
       return Value(v2.instruction, -v2.a_constant, b);
@@ -309,43 +288,42 @@
       return Value(v1.instruction, v1.a_constant - v2.a_constant, b);
     }
   }
-  return Value(fail_value);
+  return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2, int32_t fail_value) {
-  if (v1.a_constant == 0) {
-    if (IsSafeMul(v1.b_constant, v2.a_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) {
-      return Value(v2.instruction, v1.b_constant * v2.a_constant, v1.b_constant * v2.b_constant);
-    }
-  } else if (v2.a_constant == 0) {
-    if (IsSafeMul(v1.a_constant, v2.b_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) {
-      return Value(v1.instruction, v1.a_constant * v2.b_constant, v1.b_constant * v2.b_constant);
+InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) {
+  if (v1.is_known && v2.is_known) {
+    if (v1.a_constant == 0) {
+      if (IsSafeMul(v1.b_constant, v2.a_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) {
+        return Value(v2.instruction, v1.b_constant * v2.a_constant, v1.b_constant * v2.b_constant);
+      }
+    } else if (v2.a_constant == 0) {
+      if (IsSafeMul(v1.a_constant, v2.b_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) {
+        return Value(v1.instruction, v1.a_constant * v2.b_constant, v1.b_constant * v2.b_constant);
+      }
     }
   }
-  return Value(fail_value);
+  return Value();
 }
 
-InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2, int32_t fail_value) {
-  if (v1.a_constant == 0 && v2.a_constant == 0) {
+InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) {
+  if (v1.is_known && v2.is_known && v1.a_constant == 0 && v2.a_constant == 0) {
     if (IsSafeDiv(v1.b_constant, v2.b_constant)) {
       return Value(v1.b_constant / v2.b_constant);
     }
   }
-  return Value(fail_value);
+  return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MinValue(Value v1, Value v2) {
-  if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) {
-    return Value(v1.instruction, v1.a_constant, std::min(v1.b_constant, v2.b_constant));
+InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) {
+  if (v1.is_known && v2.is_known) {
+    if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) {
+      return Value(v1.instruction, v1.a_constant,
+                   is_min ? std::min(v1.b_constant, v2.b_constant)
+                          : std::max(v1.b_constant, v2.b_constant));
+    }
   }
-  return Value(INT_MIN);
-}
-
-InductionVarRange::Value InductionVarRange::MaxValue(Value v1, Value v2) {
-  if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) {
-    return Value(v1.instruction, v1.a_constant, std::max(v1.b_constant, v2.b_constant));
-  }
-  return Value(INT_MAX);
+  return Value();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index e002e5f..dbdd2ee 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -22,30 +22,36 @@
 namespace art {
 
 /**
- * This class implements induction variable based range analysis on expressions within loops.
- * It takes the results of induction variable analysis in the constructor and provides a public
- * API to obtain a conservative lower and upper bound value on each instruction in the HIR.
+ * This class implements range analysis on expressions within loops. It takes the results
+ * of induction variable analysis in the constructor and provides a public API to obtain
+ * a conservative lower and upper bound value on each instruction in the HIR.
  *
- * For example, given a linear induction 2 * i + x where 0 <= i <= 10, range analysis yields lower
- * bound value x and upper bound value x + 20 for the expression, thus, the range [x, x + 20].
+ * The range analysis is done with a combination of symbolic and partial integral evaluation
+ * of expressions. The analysis avoids complications with wrap-around arithmetic on the integral
+ * parts but all clients should be aware that wrap-around may occur on any of the symbolic parts.
+ * For example, given a known range for [0,100] for i, the evaluation yields range [-100,100]
+ * for expression -2*i+100, which is exact, and range [x,x+100] for expression i+x, which may
+ * wrap-around anywhere in the range depending on the actual value of x.
  */
 class InductionVarRange {
  public:
   /*
    * A value that can be represented as "a * instruction + b" for 32-bit constants, where
-   * Value(INT_MIN) and Value(INT_MAX) denote an unknown lower and upper bound, respectively.
-   * Although range analysis could yield more complex values, the format is sufficiently powerful
-   * to represent useful cases and feeds directly into optimizations like bounds check elimination.
+   * Value() denotes an unknown lower and upper bound. Although range analysis could yield
+   * more complex values, the format is sufficiently powerful to represent useful cases
+   * and feeds directly into optimizations like bounds check elimination.
    */
   struct Value {
+    Value() : instruction(nullptr), a_constant(0), b_constant(0), is_known(false) {}
     Value(HInstruction* i, int32_t a, int32_t b)
-        : instruction(a != 0 ? i : nullptr),
-          a_constant(a),
-          b_constant(b) {}
+        : instruction(a != 0 ? i : nullptr), a_constant(a), b_constant(b), is_known(true) {}
     explicit Value(int32_t b) : Value(nullptr, 0, b) {}
+    // Representation as: a_constant x instruction + b_constant.
     HInstruction* instruction;
     int32_t a_constant;
     int32_t b_constant;
+    // If true, represented by prior fields. Otherwise unknown value.
+    bool is_known;
   };
 
   explicit InductionVarRange(HInductionVarAnalysis* induction);
@@ -67,32 +73,34 @@
   // Private helper methods.
   //
 
-  HInductionVarAnalysis::InductionInfo* GetTripCount(HLoopInformation* loop,
-                                                     HInstruction* context);
+  Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min);
 
   static Value GetFetch(HInstruction* instruction,
                         HInductionVarAnalysis::InductionInfo* trip,
-                        int32_t fail_value);
-
-  static Value GetMin(HInductionVarAnalysis::InductionInfo* info,
-                      HInductionVarAnalysis::InductionInfo* trip);
-  static Value GetMax(HInductionVarAnalysis::InductionInfo* info,
-                      HInductionVarAnalysis::InductionInfo* trip);
+                        bool in_body,
+                        bool is_min);
+  static Value GetVal(HInductionVarAnalysis::InductionInfo* info,
+                      HInductionVarAnalysis::InductionInfo* trip,
+                      bool in_body,
+                      bool is_min);
   static Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
                       HInductionVarAnalysis::InductionInfo* info2,
                       HInductionVarAnalysis::InductionInfo* trip,
-                      int32_t fail_value);
+                      bool in_body,
+                      bool is_min);
   static Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
                       HInductionVarAnalysis::InductionInfo* info2,
                       HInductionVarAnalysis::InductionInfo* trip,
-                      int32_t fail_value);
+                      bool in_body,
+                      bool is_min);
 
-  static Value AddValue(Value v1, Value v2, int32_t fail_value);
-  static Value SubValue(Value v1, Value v2, int32_t fail_value);
-  static Value MulValue(Value v1, Value v2, int32_t fail_value);
-  static Value DivValue(Value v1, Value v2, int32_t fail_value);
-  static Value MinValue(Value v1, Value v2);
-  static Value MaxValue(Value v1, Value v2);
+  static bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value);
+
+  static Value AddValue(Value v1, Value v2);
+  static Value SubValue(Value v1, Value v2);
+  static Value MulValue(Value v1, Value v2);
+  static Value DivValue(Value v1, Value v2);
+  static Value MergeVal(Value v1, Value v2, bool is_min);
 
   /** Results of prior induction variable analysis. */
   HInductionVarAnalysis *induction_analysis_;
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index d3c3518..4497a88 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <limits.h>
-
 #include "base/arena_allocator.h"
 #include "builder.h"
 #include "gtest/gtest.h"
@@ -45,6 +43,7 @@
     EXPECT_EQ(v1.instruction, v2.instruction);
     EXPECT_EQ(v1.a_constant, v2.a_constant);
     EXPECT_EQ(v1.b_constant, v2.b_constant);
+    EXPECT_EQ(v1.is_known, v2.is_known);
   }
 
   /** Constructs bare minimum graph. */
@@ -86,8 +85,7 @@
 
   /** Constructs a trip-count. */
   HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) {
-    HInductionVarAnalysis::InductionInfo* trip = CreateConst(tc);
-    return CreateInvariant('@', trip, trip);
+    return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc));
   }
 
   /** Constructs a linear a * i + b induction. */
@@ -113,30 +111,36 @@
 
   Value GetMin(HInductionVarAnalysis::InductionInfo* info,
                HInductionVarAnalysis::InductionInfo* induc) {
-    return InductionVarRange::GetMin(info, induc);
+    return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true);
   }
 
   Value GetMax(HInductionVarAnalysis::InductionInfo* info,
                HInductionVarAnalysis::InductionInfo* induc) {
-    return InductionVarRange::GetMax(info, induc);
+    return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ false);
   }
 
   Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
-               HInductionVarAnalysis::InductionInfo* info2, int32_t fail_value) {
-    return InductionVarRange::GetMul(info1, info2, nullptr, fail_value);
+               HInductionVarAnalysis::InductionInfo* info2,
+               bool is_min) {
+    return InductionVarRange::GetMul(info1, info2, nullptr, /* in_body */ true, is_min);
   }
 
   Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
-               HInductionVarAnalysis::InductionInfo* info2, int32_t fail_value) {
-    return InductionVarRange::GetDiv(info1, info2, nullptr, fail_value);
+               HInductionVarAnalysis::InductionInfo* info2,
+               bool is_min) {
+    return InductionVarRange::GetDiv(info1, info2, nullptr, /* in_body */ true, is_min);
   }
 
-  Value AddValue(Value v1, Value v2) { return InductionVarRange::AddValue(v1, v2, INT_MIN); }
-  Value SubValue(Value v1, Value v2) { return InductionVarRange::SubValue(v1, v2, INT_MIN); }
-  Value MulValue(Value v1, Value v2) { return InductionVarRange::MulValue(v1, v2, INT_MIN); }
-  Value DivValue(Value v1, Value v2) { return InductionVarRange::DivValue(v1, v2, INT_MIN); }
-  Value MinValue(Value v1, Value v2) { return InductionVarRange::MinValue(v1, v2); }
-  Value MaxValue(Value v1, Value v2) { return InductionVarRange::MaxValue(v1, v2); }
+  bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t* value) {
+    return InductionVarRange::GetConstant(info, value);
+  }
+
+  Value AddValue(Value v1, Value v2) { return InductionVarRange::AddValue(v1, v2); }
+  Value SubValue(Value v1, Value v2) { return InductionVarRange::SubValue(v1, v2); }
+  Value MulValue(Value v1, Value v2) { return InductionVarRange::MulValue(v1, v2); }
+  Value DivValue(Value v1, Value v2) { return InductionVarRange::DivValue(v1, v2); }
+  Value MinValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, true); }
+  Value MaxValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, false); }
 
   // General building fields.
   ArenaPool pool_;
@@ -154,8 +158,8 @@
 //
 
 TEST_F(InductionVarRangeTest, GetMinMaxNull) {
-  ExpectEqual(Value(INT_MIN), GetMin(nullptr, nullptr));
-  ExpectEqual(Value(INT_MAX), GetMax(nullptr, nullptr));
+  ExpectEqual(Value(), GetMin(nullptr, nullptr));
+  ExpectEqual(Value(), GetMax(nullptr, nullptr));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxAdd) {
@@ -251,91 +255,98 @@
 }
 
 TEST_F(InductionVarRangeTest, GetMulMin) {
-  ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), INT_MIN));
-  ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), INT_MIN));
-  ExpectEqual(Value(-50), GetMul(CreateRange(-10, -2), CreateRange(3, 5), INT_MIN));
-  ExpectEqual(Value(6), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), INT_MIN));
+  ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), true));
+  ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), true));
+  ExpectEqual(Value(-50), GetMul(CreateRange(-10, -2), CreateRange(3, 5), true));
+  ExpectEqual(Value(6), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), true));
 }
 
 TEST_F(InductionVarRangeTest, GetMulMax) {
-  ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), INT_MAX));
-  ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), INT_MAX));
-  ExpectEqual(Value(-6), GetMul(CreateRange(-10, -2), CreateRange(3, 5), INT_MAX));
-  ExpectEqual(Value(50), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), INT_MAX));
+  ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), false));
+  ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), false));
+  ExpectEqual(Value(-6), GetMul(CreateRange(-10, -2), CreateRange(3, 5), false));
+  ExpectEqual(Value(50), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), false));
 }
 
 TEST_F(InductionVarRangeTest, GetDivMin) {
-  ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), INT_MIN));
-  ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), INT_MIN));
-  ExpectEqual(Value(-500), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), INT_MIN));
-  ExpectEqual(Value(10), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), INT_MIN));
+  ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), true));
+  ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), true));
+  ExpectEqual(Value(-500), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), true));
+  ExpectEqual(Value(10), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), true));
 }
 
 TEST_F(InductionVarRangeTest, GetDivMax) {
-  ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), INT_MAX));
-  ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), INT_MAX));
-  ExpectEqual(Value(-10), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), INT_MAX));
-  ExpectEqual(Value(500), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), INT_MAX));
+  ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), false));
+  ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), false));
+  ExpectEqual(Value(-10), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), false));
+  ExpectEqual(Value(500), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), false));
+}
+
+TEST_F(InductionVarRangeTest, GetConstant) {
+  int32_t value;
+  ASSERT_TRUE(GetConstant(CreateConst(12345), &value));
+  EXPECT_EQ(12345, value);
+  EXPECT_FALSE(GetConstant(CreateRange(1, 2), &value));
 }
 
 TEST_F(InductionVarRangeTest, AddValue) {
   ExpectEqual(Value(110), AddValue(Value(10), Value(100)));
   ExpectEqual(Value(-5), AddValue(Value(&x_, 1, -4), Value(&x_, -1, -1)));
   ExpectEqual(Value(&x_, 3, -5), AddValue(Value(&x_, 2, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(INT_MIN), AddValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
+  ExpectEqual(Value(), AddValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
   ExpectEqual(Value(&x_, 1, 23), AddValue(Value(&x_, 1, 20), Value(3)));
   ExpectEqual(Value(&y_, 1, 5), AddValue(Value(55), Value(&y_, 1, -50)));
-  // Unsafe.
-  ExpectEqual(Value(INT_MIN), AddValue(Value(INT_MAX - 5), Value(6)));
+  const int32_t max_value = std::numeric_limits<int32_t>::max();
+  ExpectEqual(Value(max_value), AddValue(Value(max_value - 5), Value(5)));
+  ExpectEqual(Value(), AddValue(Value(max_value - 5), Value(6)));  // unsafe
 }
 
 TEST_F(InductionVarRangeTest, SubValue) {
   ExpectEqual(Value(-90), SubValue(Value(10), Value(100)));
   ExpectEqual(Value(-3), SubValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
   ExpectEqual(Value(&x_, 2, -3), SubValue(Value(&x_, 3, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(INT_MIN), SubValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
+  ExpectEqual(Value(), SubValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
   ExpectEqual(Value(&x_, 1, 17), SubValue(Value(&x_, 1, 20), Value(3)));
   ExpectEqual(Value(&y_, -4, 105), SubValue(Value(55), Value(&y_, 4, -50)));
-  // Unsafe.
-  ExpectEqual(Value(INT_MIN), SubValue(Value(INT_MIN + 5), Value(6)));
+  const int32_t min_value = std::numeric_limits<int32_t>::min();
+  ExpectEqual(Value(min_value), SubValue(Value(min_value + 5), Value(5)));
+  ExpectEqual(Value(), SubValue(Value(min_value + 5), Value(6)));  // unsafe
 }
 
 TEST_F(InductionVarRangeTest, MulValue) {
   ExpectEqual(Value(1000), MulValue(Value(10), Value(100)));
-  ExpectEqual(Value(INT_MIN), MulValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(INT_MIN), MulValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
+  ExpectEqual(Value(), MulValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
+  ExpectEqual(Value(), MulValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
   ExpectEqual(Value(&x_, 9, 60), MulValue(Value(&x_, 3, 20), Value(3)));
   ExpectEqual(Value(&y_, 55, -110), MulValue(Value(55), Value(&y_, 1, -2)));
-  // Unsafe.
-  ExpectEqual(Value(INT_MIN), MulValue(Value(90000), Value(-90000)));
+  ExpectEqual(Value(), MulValue(Value(90000), Value(-90000)));  // unsafe
 }
 
 TEST_F(InductionVarRangeTest, DivValue) {
   ExpectEqual(Value(25), DivValue(Value(100), Value(4)));
-  ExpectEqual(Value(INT_MIN), DivValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(INT_MIN), DivValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(INT_MIN), DivValue(Value(&x_, 12, 24), Value(3)));
-  ExpectEqual(Value(INT_MIN), DivValue(Value(55), Value(&y_, 1, -50)));
-  // Unsafe.
-  ExpectEqual(Value(INT_MIN), DivValue(Value(1), Value(0)));
+  ExpectEqual(Value(), DivValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
+  ExpectEqual(Value(), DivValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
+  ExpectEqual(Value(), DivValue(Value(&x_, 12, 24), Value(3)));
+  ExpectEqual(Value(), DivValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(), DivValue(Value(1), Value(0)));  // unsafe
 }
 
 TEST_F(InductionVarRangeTest, MinValue) {
   ExpectEqual(Value(10), MinValue(Value(10), Value(100)));
   ExpectEqual(Value(&x_, 1, -4), MinValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
   ExpectEqual(Value(&x_, 4, -4), MinValue(Value(&x_, 4, -4), Value(&x_, 4, -1)));
-  ExpectEqual(Value(INT_MIN), MinValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(INT_MIN), MinValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(INT_MIN), MinValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(), MinValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
+  ExpectEqual(Value(), MinValue(Value(&x_, 1, 20), Value(3)));
+  ExpectEqual(Value(), MinValue(Value(55), Value(&y_, 1, -50)));
 }
 
 TEST_F(InductionVarRangeTest, MaxValue) {
   ExpectEqual(Value(100), MaxValue(Value(10), Value(100)));
   ExpectEqual(Value(&x_, 1, -1), MaxValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
   ExpectEqual(Value(&x_, 4, -1), MaxValue(Value(&x_, 4, -4), Value(&x_, 4, -1)));
-  ExpectEqual(Value(INT_MAX), MaxValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(INT_MAX), MaxValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(INT_MAX), MaxValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(), MaxValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
+  ExpectEqual(Value(), MaxValue(Value(&x_, 1, 20), Value(3)));
+  ExpectEqual(Value(), MaxValue(Value(55), Value(&y_, 1, -50)));
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 039029a..0b65c56 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -247,12 +247,14 @@
     return false;
   }
 
-  uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex();
-  if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-        resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
-                   << " couldn't be verified, so it cannot be inlined";
-    return false;
+  if (!resolved_method->GetDeclaringClass()->IsVerified()) {
+    uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex();
+    if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
+          resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) {
+      VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+                     << " couldn't be verified, so it cannot be inlined";
+      return false;
+    }
   }
 
   if (invoke_instruction->IsInvokeStaticOrDirect() &&
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 0ac26de..3287a0a 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -71,7 +71,8 @@
   void VisitXor(HXor* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
   void VisitFakeString(HFakeString* fake_string) OVERRIDE;
-  bool IsDominatedByInputNullCheck(HInstruction* instr);
+
+  bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
@@ -187,14 +188,18 @@
   }
 }
 
-bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* instr) {
-  HInstruction* input = instr->InputAt(0);
+bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInstruction* at) const {
+  if (!input->CanBeNull()) {
+    return true;
+  }
+
   for (HUseIterator<HInstruction*> it(input->GetUses()); !it.Done(); it.Advance()) {
     HInstruction* use = it.Current()->GetUser();
-    if (use->IsNullCheck() && use->StrictlyDominates(instr)) {
+    if (use->IsNullCheck() && use->StrictlyDominates(at)) {
       return true;
     }
   }
+
   return false;
 }
 
@@ -231,7 +236,7 @@
 
 void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
   HInstruction* object = check_cast->InputAt(0);
-  if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) {
+  if (CanEnsureNotNullAt(object, check_cast)) {
     check_cast->ClearMustDoNullCheck();
   }
 
@@ -267,7 +272,7 @@
 void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
   HInstruction* object = instruction->InputAt(0);
   bool can_be_null = true;
-  if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) {
+  if (CanEnsureNotNullAt(object, instruction)) {
     can_be_null = false;
     instruction->ClearMustDoNullCheck();
   }
@@ -305,14 +310,14 @@
 
 void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   if ((instruction->GetValue()->GetType() == Primitive::kPrimNot)
-      && !instruction->GetValue()->CanBeNull()) {
+      && CanEnsureNotNullAt(instruction->GetValue(), instruction)) {
     instruction->ClearValueCanBeNull();
   }
 }
 
 void InstructionSimplifierVisitor::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   if ((instruction->GetValue()->GetType() == Primitive::kPrimNot)
-      && !instruction->GetValue()->CanBeNull()) {
+      && CanEnsureNotNullAt(instruction->GetValue(), instruction)) {
     instruction->ClearValueCanBeNull();
   }
 }
@@ -426,19 +431,41 @@
   HInstruction* value = instruction->GetValue();
   if (value->GetType() != Primitive::kPrimNot) return;
 
+  if (CanEnsureNotNullAt(value, instruction)) {
+    instruction->ClearValueCanBeNull();
+  }
+
   if (value->IsArrayGet()) {
     if (value->AsArrayGet()->GetArray() == instruction->GetArray()) {
       // If the code is just swapping elements in the array, no need for a type check.
       instruction->ClearNeedsTypeCheck();
+      return;
     }
   }
 
   if (value->IsNullConstant()) {
     instruction->ClearNeedsTypeCheck();
+    return;
   }
 
-  if (!value->CanBeNull()) {
-    instruction->ClearValueCanBeNull();
+  ScopedObjectAccess soa(Thread::Current());
+  ReferenceTypeInfo array_rti = instruction->GetArray()->GetReferenceTypeInfo();
+  ReferenceTypeInfo value_rti = value->GetReferenceTypeInfo();
+  if (!array_rti.IsValid()) {
+    return;
+  }
+
+  if (value_rti.IsValid() && array_rti.CanArrayHold(value_rti)) {
+    instruction->ClearNeedsTypeCheck();
+    return;
+  }
+
+  if (array_rti.IsObjectArray()) {
+    if (array_rti.IsExact()) {
+      instruction->ClearNeedsTypeCheck();
+      return;
+    }
+    instruction->SetStaticTypeOfArrayIsObjectArray();
   }
 }
 
@@ -502,14 +529,45 @@
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
 
-  if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) {
-    // Replace code looking like
-    //    AND dst, src, 0xFFF...FF
-    // with
-    //    src
-    instruction->ReplaceWith(input_other);
-    instruction->GetBlock()->RemoveInstruction(instruction);
-    return;
+  if (input_cst != nullptr) {
+    int64_t value = Int64FromConstant(input_cst);
+    if (value == -1) {
+      // Replace code looking like
+      //    AND dst, src, 0xFFF...FF
+      // with
+      //    src
+      instruction->ReplaceWith(input_other);
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
+      return;
+    }
+    // Eliminate And from UShr+And if the And-mask contains all the bits that
+    // can be non-zero after UShr. Transform Shr+And to UShr if the And-mask
+    // precisely clears the shifted-in sign bits.
+    if ((input_other->IsUShr() || input_other->IsShr()) && input_other->InputAt(1)->IsConstant()) {
+      size_t reg_bits = (instruction->GetResultType() == Primitive::kPrimLong) ? 64 : 32;
+      size_t shift = Int64FromConstant(input_other->InputAt(1)->AsConstant()) & (reg_bits - 1);
+      size_t num_tail_bits_set = CTZ(value + 1);
+      if ((num_tail_bits_set >= reg_bits - shift) && input_other->IsUShr()) {
+        // This AND clears only bits known to be clear, for example "(x >>> 24) & 0xff".
+        instruction->ReplaceWith(input_other);
+        instruction->GetBlock()->RemoveInstruction(instruction);
+        RecordSimplification();
+        return;
+      }  else if ((num_tail_bits_set == reg_bits - shift) && IsPowerOfTwo(value + 1) &&
+          input_other->HasOnlyOneNonEnvironmentUse()) {
+        DCHECK(input_other->IsShr());  // For UShr, we would have taken the branch above.
+        // Replace SHR+AND with USHR, for example "(x >> 24) & 0xff" -> "x >>> 24".
+        HUShr* ushr = new (GetGraph()->GetArena()) HUShr(instruction->GetType(),
+                                                         input_other->InputAt(0),
+                                                         input_other->InputAt(1),
+                                                         input_other->GetDexPc());
+        instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, ushr);
+        input_other->GetBlock()->RemoveInstruction(input_other);
+        RecordSimplification();
+        return;
+      }
+    }
   }
 
   // We assume that GVN has run before, so we only perform a pointer comparison.
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index b71fdb8..9564622 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -90,7 +90,7 @@
 }
 
 static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) {
-  if (instruction_set == kMips || instruction_set == kMips64) {
+  if (instruction_set == kMips) {
     return Intrinsics::kNone;
   }
   switch (method.opcode) {
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
new file mode 100644
index 0000000..1b4d161
--- /dev/null
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -0,0 +1,799 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_mips64.h"
+
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "art_method.h"
+#include "code_generator_mips64.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/mips64/assembler_mips64.h"
+#include "utils/mips64/constants_mips64.h"
+
+namespace art {
+
+namespace mips64 {
+
+IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen)
+  : arena_(codegen->GetGraph()->GetArena()) {
+}
+
+Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() {
+  return reinterpret_cast<Mips64Assembler*>(codegen_->GetAssembler());
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+bool IntrinsicLocationsBuilderMIPS64::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  LocationSummary* res = invoke->GetLocations();
+  return res != nullptr && res->Intrinsified();
+}
+
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  FpuRegister in  = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (is64bit) {
+    __ Dmfc1(out, in);
+  } else {
+    __ Mfc1(out, in);
+  }
+}
+
+// long java.lang.Double.doubleToRawLongBits(double)
+void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+
+// int java.lang.Float.floatToRawIntBits(float)
+void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  if (is64bit) {
+    __ Dmtc1(in, out);
+  } else {
+    __ Mtc1(in, out);
+  }
+}
+
+// double java.lang.Double.longBitsToDouble(long)
+void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+// float java.lang.Float.intBitsToFloat(int)
+void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+                            Primitive::Type type,
+                            Mips64Assembler* assembler) {
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  switch (type) {
+    case Primitive::kPrimShort:
+      __ Dsbh(out, in);
+      __ Seh(out, out);
+      break;
+    case Primitive::kPrimInt:
+      __ Rotr(out, in, 16);
+      __ Wsbh(out, out);
+      break;
+    case Primitive::kPrimLong:
+      __ Dsbh(out, in);
+      __ Dshd(out, out);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
+      UNREACHABLE();
+  }
+}
+
+// int java.lang.Integer.reverseBytes(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.reverseBytes(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+// short java.lang.Short.reverseBytes(short)
+void IntrinsicLocationsBuilderMIPS64::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitShortReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+static void GenCountZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (is64bit) {
+    __ Dclz(out, in);
+  } else {
+    __ Clz(out, in);
+  }
+}
+
+// int java.lang.Integer.numberOfLeadingZeros(int i)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenCountZeroes(invoke->GetLocations(), false, GetAssembler());
+}
+
+// int java.lang.Long.numberOfLeadingZeros(long i)
+void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenCountZeroes(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenReverse(LocationSummary* locations,
+                       Primitive::Type type,
+                       Mips64Assembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (type == Primitive::kPrimInt) {
+    __ Rotr(out, in, 16);
+    __ Wsbh(out, out);
+    __ Bitswap(out, out);
+  } else {
+    __ Dsbh(out, in);
+    __ Dshd(out, out);
+    __ Dbitswap(out, out);
+  }
+}
+
+// int java.lang.Integer.reverse(int)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+// long java.lang.Long.reverse(long)
+void IntrinsicLocationsBuilderMIPS64::VisitLongReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitLongReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  if (is64bit) {
+    __ AbsD(out, in);
+  } else {
+    __ AbsS(out, in);
+  }
+}
+
+// double java.lang.Math.abs(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+// float java.lang.Math.abs(float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
+  GpuRegister in  = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (is64bit) {
+    __ Dsra32(AT, in, 31);
+    __ Xor(out, in, AT);
+    __ Dsubu(out, out, AT);
+  } else {
+    __ Sra(AT, in, 31);
+    __ Xor(out, in, AT);
+    __ Subu(out, out, AT);
+  }
+}
+
+// int java.lang.Math.abs(int)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+// long java.lang.Math.abs(long)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToInt(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations,
+                        bool is_min,
+                        bool is_double,
+                        Mips64Assembler* assembler) {
+  FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  if (is_double) {
+    if (is_min) {
+      __ MinD(out, lhs, rhs);
+    } else {
+      __ MaxD(out, lhs, rhs);
+    }
+  } else {
+    if (is_min) {
+      __ MinS(out, lhs, rhs);
+    } else {
+      __ MaxS(out, lhs, rhs);
+    }
+  }
+}
+
+static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+// double java.lang.Math.min(double, double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+// float java.lang.Math.min(float, float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+// double java.lang.Math.max(double, double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+// float java.lang.Math.max(float, float)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations,
+                      bool is_min,
+                      Mips64Assembler* assembler) {
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  if (out == lhs) {
+    __ Slt(AT, rhs, lhs);
+    if (is_min) {
+      __ Seleqz(out, lhs, AT);
+      __ Selnez(AT, rhs, AT);
+    } else {
+      __ Selnez(out, lhs, AT);
+      __ Seleqz(AT, rhs, AT);
+    }
+  } else {
+    __ Slt(AT, lhs, rhs);
+    if (is_min) {
+      __ Seleqz(out, rhs, AT);
+      __ Selnez(AT, lhs, AT);
+    } else {
+      __ Selnez(out, rhs, AT);
+      __ Seleqz(AT, lhs, AT);
+    }
+  }
+  __ Or(out, out, AT);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+// int java.lang.Math.min(int, int)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+}
+
+// long java.lang.Math.min(long, long)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+}
+
+// int java.lang.Math.max(int, int)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+}
+
+// long java.lang.Math.max(long, long)
+void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+}
+
+// double java.lang.Math.sqrt(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathSqrt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  __ SqrtD(out, in);
+}
+
+static void CreateFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+// double java.lang.Math.rint(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) {
+  CreateFPToFP(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  __ RintD(out, in);
+}
+
+// double java.lang.Math.floor(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathFloor(HInvoke* invoke) {
+  CreateFPToFP(arena_, invoke);
+}
+
+// 0x200 - +zero
+// 0x040 - +infinity
+// 0x020 - -zero
+// 0x004 - -infinity
+// 0x002 - quiet NaN
+// 0x001 - signaling NaN
+const constexpr uint16_t CLASS_MASK = 0x267;
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  Label done;
+
+  // double floor(double in) {
+  //     if in.isNaN || in.isInfinite || in.isZero {
+  //         return in;
+  //     }
+  __ ClassD(out, in);
+  __ Dmfc1(AT, out);
+  __ Andi(AT, AT, CLASS_MASK);       // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
+  __ MovD(out, in);
+  __ Bnezc(AT, &done);
+
+  //     Long outLong = floor(in);
+  //     if outLong == Long.MAX_VALUE {
+  //         // floor() has almost certainly returned a value which
+  //         // can't be successfully represented as a signed 64-bit
+  //         // number.  Java expects that the input value will be
+  //         // returned in these cases.
+  //         // There is also a small probability that floor(in)
+  //         // correctly truncates the input value to Long.MAX_VALUE.  In
+  //         // that case, this exception handling code still does the
+  //         // correct thing.
+  //         return in;
+  //     }
+  __ FloorLD(out, in);
+  __ Dmfc1(AT, out);
+  __ MovD(out, in);
+  __ LoadConst64(TMP, kPrimLongMax);
+  __ Beqc(AT, TMP, &done);
+
+  //     double out = outLong;
+  //     return out;
+  __ Dmtc1(AT, out);
+  __ Cvtdl(out, out);
+  __ Bind(&done);
+  // }
+}
+
+// double java.lang.Math.ceil(double)
+void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) {
+  CreateFPToFP(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+  Label done;
+
+  // double ceil(double in) {
+  //     if in.isNaN || in.isInfinite || in.isZero {
+  //         return in;
+  //     }
+  __ ClassD(out, in);
+  __ Dmfc1(AT, out);
+  __ Andi(AT, AT, CLASS_MASK);       // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN
+  __ MovD(out, in);
+  __ Bnezc(AT, &done);
+
+  //     Long outLong = ceil(in);
+  //     if outLong == Long.MAX_VALUE {
+  //         // ceil() has almost certainly returned a value which
+  //         // can't be successfully represented as a signed 64-bit
+  //         // number.  Java expects that the input value will be
+  //         // returned in these cases.
+  //         // There is also a small probability that ceil(in)
+  //         // correctly rounds up the input value to Long.MAX_VALUE.  In
+  //         // that case, this exception handling code still does the
+  //         // correct thing.
+  //         return in;
+  //     }
+  __ CeilLD(out, in);
+  __ Dmfc1(AT, out);
+  __ MovD(out, in);
+  __ LoadConst64(TMP, kPrimLongMax);
+  __ Beqc(AT, TMP, &done);
+
+  //     double out = outLong;
+  //     return out;
+  __ Dmtc1(AT, out);
+  __ Cvtdl(out, out);
+  __ Bind(&done);
+  // }
+}
+
+// byte libcore.io.Memory.peekByte(long address)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekByte(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ Lb(out, adr, 0);
+}
+
+// short libcore.io.Memory.peekShort(long address)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ Lh(out, adr, 0);
+}
+
+// int libcore.io.Memory.peekInt(long address)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ Lw(out, adr, 0);
+}
+
+// long libcore.io.Memory.peekLong(long address)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ Ld(out, adr, 0);
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+// void libcore.io.Memory.pokeByte(long address, byte value)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeByte(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>();
+
+  __ Sb(val, adr, 0);
+}
+
+// void libcore.io.Memory.pokeShort(long address, short value)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>();
+
+  __ Sh(val, adr, 0);
+}
+
+// void libcore.io.Memory.pokeInt(long address, int value)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>();
+
+  __ Sw(val, adr, 00);
+}
+
+// void libcore.io.Memory.pokeLong(long address, long value)
+void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>();
+
+  __ Sd(val, adr, 0);
+}
+
+// Thread java.lang.Thread.currentThread()
+void IntrinsicLocationsBuilderMIPS64::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>();
+
+  __ LoadFromOffset(kLoadUnsignedWord,
+                    out,
+                    TR,
+                    Thread::PeerOffset<kMips64PointerSize>().Int32Value());
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
+void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}                                                                                      \
+void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+}
+
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+
+UNIMPLEMENTED_INTRINSIC(UnsafeGet)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetLongVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetObject)
+UNIMPLEMENTED_INTRINSIC(UnsafeGetObjectVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafePut)
+UNIMPLEMENTED_INTRINSIC(UnsafePutOrdered)
+UNIMPLEMENTED_INTRINSIC(UnsafePutVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafePutObject)
+UNIMPLEMENTED_INTRINSIC(UnsafePutObjectOrdered)
+UNIMPLEMENTED_INTRINSIC(UnsafePutObjectVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafePutLong)
+UNIMPLEMENTED_INTRINSIC(UnsafePutLongOrdered)
+UNIMPLEMENTED_INTRINSIC(UnsafePutLongVolatile)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
+UNIMPLEMENTED_INTRINSIC(StringCharAt)
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringEquals)
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
+UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars)
+UNIMPLEMENTED_INTRINSIC(StringNewStringFromString)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros)
+
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+
+#undef UNIMPLEMENTED_INTRINSIC
+
+#undef __
+
+}  // namespace mips64
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
new file mode 100644
index 0000000..1481d24
--- /dev/null
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace mips64 {
+
+class CodeGeneratorMIPS64;
+class Mips64Assembler;
+
+class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen);
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64);
+};
+
+class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorMIPS64(CodeGeneratorMIPS64* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  Mips64Assembler* GetAssembler();
+
+  ArenaAllocator* GetAllocator();
+
+  CodeGeneratorMIPS64* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS64);
+};
+
+}  // namespace mips64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index d14dfc1..ebdf7a2 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -23,18 +23,15 @@
 LocationSummary::LocationSummary(HInstruction* instruction,
                                  CallKind call_kind,
                                  bool intrinsified)
-    : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
-      temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
+    : inputs_(instruction->InputCount(),
+              instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)),
+      temps_(instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)),
       output_overlaps_(Location::kOutputOverlap),
       call_kind_(call_kind),
       stack_mask_(nullptr),
       register_mask_(0),
       live_registers_(),
       intrinsified_(intrinsified) {
-  inputs_.SetSize(instruction->InputCount());
-  for (size_t i = 0; i < instruction->InputCount(); ++i) {
-    inputs_.Put(i, Location());
-  }
   instruction->SetLocations(this);
 
   if (NeedsSafepoint()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 2162ab9..de4fb7e 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -17,11 +17,11 @@
 #ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_
 #define ART_COMPILER_OPTIMIZING_LOCATIONS_H_
 
+#include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/bit_field.h"
 #include "base/bit_vector.h"
 #include "base/value_object.h"
-#include "utils/growable_array.h"
 
 namespace art {
 
@@ -468,7 +468,7 @@
  * The intent is to have the code for generating the instruction independent of
  * register allocation. A register allocator just has to provide a LocationSummary.
  */
-class LocationSummary : public ArenaObject<kArenaAllocMisc> {
+class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
  public:
   enum CallKind {
     kNoCall,
@@ -481,15 +481,17 @@
                   bool intrinsified = false);
 
   void SetInAt(uint32_t at, Location location) {
-    inputs_.Put(at, location);
+    DCHECK_LT(at, GetInputCount());
+    inputs_[at] = location;
   }
 
   Location InAt(uint32_t at) const {
-    return inputs_.Get(at);
+    DCHECK_LT(at, GetInputCount());
+    return inputs_[at];
   }
 
   size_t GetInputCount() const {
-    return inputs_.Size();
+    return inputs_.size();
   }
 
   void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) {
@@ -508,23 +510,25 @@
   }
 
   void AddTemp(Location location) {
-    temps_.Add(location);
+    temps_.push_back(location);
   }
 
   Location GetTemp(uint32_t at) const {
-    return temps_.Get(at);
+    DCHECK_LT(at, GetTempCount());
+    return temps_[at];
   }
 
   void SetTempAt(uint32_t at, Location location) {
-    DCHECK(temps_.Get(at).IsUnallocated() || temps_.Get(at).IsInvalid());
-    temps_.Put(at, location);
+    DCHECK_LT(at, GetTempCount());
+    DCHECK(temps_[at].IsUnallocated() || temps_[at].IsInvalid());
+    temps_[at] = location;
   }
 
   size_t GetTempCount() const {
-    return temps_.Size();
+    return temps_.size();
   }
 
-  bool HasTemps() const { return !temps_.IsEmpty(); }
+  bool HasTemps() const { return !temps_.empty(); }
 
   Location Out() const { return output_; }
 
@@ -576,7 +580,7 @@
   }
 
   bool IsFixedInput(uint32_t input_index) const {
-    Location input = inputs_.Get(input_index);
+    Location input = inputs_[input_index];
     return input.IsRegister()
         || input.IsFpuRegister()
         || input.IsPair()
@@ -593,8 +597,8 @@
   }
 
  private:
-  GrowableArray<Location> inputs_;
-  GrowableArray<Location> temps_;
+  ArenaVector<Location> inputs_;
+  ArenaVector<Location> temps_;
   // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
   // share the same register as the inputs.
   Location::OutputOverlap output_overlaps_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index b2407c5..989970f 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -20,8 +20,8 @@
 #include "ssa_builder.h"
 #include "base/bit_vector-inl.h"
 #include "base/bit_utils.h"
+#include "base/stl_util.h"
 #include "mirror/class-inl.h"
-#include "utils/growable_array.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
@@ -32,8 +32,41 @@
 }
 
 void HGraph::FindBackEdges(ArenaBitVector* visited) {
+  // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks.
+  DCHECK_EQ(visited->GetHighestBitSet(), -1);
+
+  // Nodes that we're currently visiting, indexed by block id.
   ArenaBitVector visiting(arena_, blocks_.size(), false);
-  VisitBlockForBackEdges(entry_block_, visited, &visiting);
+  // Number of successors visited from a given node, indexed by block id.
+  ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter());
+  // Stack of nodes that we're currently visiting (same as marked in "visiting" above).
+  ArenaVector<HBasicBlock*> worklist(arena_->Adapter());
+  constexpr size_t kDefaultWorklistSize = 8;
+  worklist.reserve(kDefaultWorklistSize);
+  visited->SetBit(entry_block_->GetBlockId());
+  visiting.SetBit(entry_block_->GetBlockId());
+  worklist.push_back(entry_block_);
+
+  while (!worklist.empty()) {
+    HBasicBlock* current = worklist.back();
+    uint32_t current_id = current->GetBlockId();
+    if (successors_visited[current_id] == current->GetSuccessors().size()) {
+      visiting.ClearBit(current_id);
+      worklist.pop_back();
+    } else {
+      DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size());
+      HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
+      uint32_t successor_id = successor->GetBlockId();
+      if (visiting.IsBitSet(successor_id)) {
+        DCHECK(ContainsElement(worklist, successor));
+        successor->AddBackEdge(current);
+      } else if (!visited->IsBitSet(successor_id)) {
+        visited->SetBit(successor_id);
+        visiting.SetBit(successor_id);
+        worklist.push_back(successor);
+      }
+    }
+  }
 }
 
 static void RemoveAsUser(HInstruction* instruction) {
@@ -79,24 +112,6 @@
   }
 }
 
-void HGraph::VisitBlockForBackEdges(HBasicBlock* block,
-                                    ArenaBitVector* visited,
-                                    ArenaBitVector* visiting) {
-  int id = block->GetBlockId();
-  if (visited->IsBitSet(id)) return;
-
-  visited->SetBit(id);
-  visiting->SetBit(id);
-  for (HBasicBlock* successor : block->GetSuccessors()) {
-    if (visiting->IsBitSet(successor->GetBlockId())) {
-      successor->AddBackEdge(block);
-    } else {
-      VisitBlockForBackEdges(successor, visited, visiting);
-    }
-  }
-  visiting->ClearBit(id);
-}
-
 void HGraph::BuildDominatorTree() {
   // (1) Simplify the CFG so that catch blocks have only exceptional incoming
   //     edges. This invariant simplifies building SSA form because Phis cannot
@@ -141,10 +156,43 @@
 void HGraph::ComputeDominanceInformation() {
   DCHECK(reverse_post_order_.empty());
   reverse_post_order_.reserve(blocks_.size());
-  ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter());
   reverse_post_order_.push_back(entry_block_);
-  for (HBasicBlock* successor : entry_block_->GetSuccessors()) {
-    VisitBlockForDominatorTree(successor, entry_block_, &visits);
+
+  // Number of visits of a given node, indexed by block id.
+  ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter());
+  // Number of successors visited from a given node, indexed by block id.
+  ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter());
+  // Nodes for which we need to visit successors.
+  ArenaVector<HBasicBlock*> worklist(arena_->Adapter());
+  constexpr size_t kDefaultWorklistSize = 8;
+  worklist.reserve(kDefaultWorklistSize);
+  worklist.push_back(entry_block_);
+
+  while (!worklist.empty()) {
+    HBasicBlock* current = worklist.back();
+    uint32_t current_id = current->GetBlockId();
+    if (successors_visited[current_id] == current->GetSuccessors().size()) {
+      worklist.pop_back();
+    } else {
+      DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size());
+      HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
+
+      if (successor->GetDominator() == nullptr) {
+        successor->SetDominator(current);
+      } else {
+        successor->SetDominator(FindCommonDominator(successor->GetDominator(), current));
+      }
+
+      // Once all the forward edges have been visited, we know the immediate
+      // dominator of the block. We can then start visiting its successors.
+      DCHECK_LT(successor->GetBlockId(), visits.size());
+      if (++visits[successor->GetBlockId()] ==
+          successor->GetPredecessors().size() - successor->NumberOfBackEdges()) {
+        successor->GetDominator()->AddDominatedBlock(successor);
+        reverse_post_order_.push_back(successor);
+        worklist.push_back(successor);
+      }
+    }
   }
 }
 
@@ -166,28 +214,6 @@
   return nullptr;
 }
 
-void HGraph::VisitBlockForDominatorTree(HBasicBlock* block,
-                                        HBasicBlock* predecessor,
-                                        ArenaVector<size_t>* visits) {
-  if (block->GetDominator() == nullptr) {
-    block->SetDominator(predecessor);
-  } else {
-    block->SetDominator(FindCommonDominator(block->GetDominator(), predecessor));
-  }
-
-  // Once all the forward edges have been visited, we know the immediate
-  // dominator of the block. We can then start visiting its successors.
-  DCHECK_LT(block->GetBlockId(), visits->size());
-  if (++(*visits)[block->GetBlockId()] ==
-      block->GetPredecessors().size() - block->NumberOfBackEdges()) {
-    block->GetDominator()->AddDominatedBlock(block);
-    reverse_post_order_.push_back(block);
-    for (HBasicBlock* successor : block->GetSuccessors()) {
-      VisitBlockForDominatorTree(successor, block, visits);
-    }
-  }
-}
-
 void HGraph::TransformToSsa() {
   DCHECK(!reverse_post_order_.empty());
   SsaBuilder ssa_builder(this);
@@ -1143,6 +1169,23 @@
   return new_block;
 }
 
+HBasicBlock* HBasicBlock::CreateImmediateDominator() {
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+  DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented.";
+
+  HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc());
+
+  for (HBasicBlock* predecessor : GetPredecessors()) {
+    new_block->predecessors_.push_back(predecessor);
+    predecessor->successors_[predecessor->GetSuccessorIndexOf(this)] = new_block;
+  }
+  predecessors_.clear();
+  AddPredecessor(new_block);
+
+  GetGraph()->AddBlock(new_block);
+  return new_block;
+}
+
 HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) {
   DCHECK(!cursor->IsControlFlow());
   DCHECK_NE(instructions_.last_instruction_, cursor);
@@ -1188,6 +1231,15 @@
   }
 }
 
+bool HBasicBlock::HasThrowingInstructions() const {
+  for (HInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
+    if (it.Current()->CanThrow()) {
+      return true;
+    }
+  }
+  return false;
+}
+
 static bool HasOnlyOneInstruction(const HBasicBlock& block) {
   return block.GetPhis().IsEmpty()
       && !block.GetInstructions().IsEmpty()
@@ -1297,16 +1349,25 @@
   // instructions.
   for (HBasicBlock* predecessor : predecessors_) {
     HInstruction* last_instruction = predecessor->GetLastInstruction();
-    predecessor->RemoveInstruction(last_instruction);
     predecessor->RemoveSuccessor(this);
-    if (predecessor->GetSuccessors().size() == 1u) {
-      DCHECK(last_instruction->IsIf());
+    uint32_t num_pred_successors = predecessor->GetSuccessors().size();
+    if (num_pred_successors == 1u) {
+      // If we have one successor after removing one, then we must have
+      // had an HIf or HPackedSwitch, as they have more than one successor.
+      // Replace those with a HGoto.
+      DCHECK(last_instruction->IsIf() || last_instruction->IsPackedSwitch());
+      predecessor->RemoveInstruction(last_instruction);
       predecessor->AddInstruction(new (graph_->GetArena()) HGoto(last_instruction->GetDexPc()));
-    } else {
+    } else if (num_pred_successors == 0u) {
       // The predecessor has no remaining successors and therefore must be dead.
       // We deliberately leave it without a control-flow instruction so that the
       // SSAChecker fails unless it is not removed during the pass too.
-      DCHECK_EQ(predecessor->GetSuccessors().size(), 0u);
+      predecessor->RemoveInstruction(last_instruction);
+    } else {
+      // There are multiple successors left.  This must come from a HPackedSwitch
+      // and we are in the middle of removing the HPackedSwitch. Like above, leave
+      // this alone, and the SSAChecker will fail if it is not removed as well.
+      DCHECK(last_instruction->IsPackedSwitch());
     }
   }
   predecessors_.clear();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8dd31be..849f876 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -35,7 +35,6 @@
 #include "offsets.h"
 #include "primitive.h"
 #include "utils/arena_bit_vector.h"
-#include "utils/growable_array.h"
 
 namespace art {
 
@@ -370,13 +369,7 @@
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
  private:
-  void VisitBlockForDominatorTree(HBasicBlock* block,
-                                  HBasicBlock* predecessor,
-                                  ArenaVector<size_t>* visits);
   void FindBackEdges(ArenaBitVector* visited);
-  void VisitBlockForBackEdges(HBasicBlock* block,
-                              ArenaBitVector* visited,
-                              ArenaBitVector* visiting);
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
 
@@ -825,11 +818,17 @@
     return EndsWithTryBoundary() ? 1 : GetSuccessors().size();
   }
 
+  // Create a new block between this block and its predecessors. The new block
+  // is added to the graph, all predecessor edges are relinked to it and an edge
+  // is created to `this`. Returns the new empty block. Reverse post order or
+  // loop and try/catch information are not updated.
+  HBasicBlock* CreateImmediateDominator();
+
   // Split the block into two blocks just before `cursor`. Returns the newly
   // created, latter block. Note that this method will add the block to the
   // graph, create a Goto at the end of the former block and will create an edge
   // between the blocks. It will not, however, update the reverse post order or
-  // loop information.
+  // loop and try/catch information.
   HBasicBlock* SplitBefore(HInstruction* cursor);
 
   // Split the block into two blocks just after `cursor`. Returns the newly
@@ -940,6 +939,8 @@
   // the appropriate try entry will be returned.
   const HTryBoundary* ComputeTryEntryOfSuccessors() const;
 
+  bool HasThrowingInstructions() const;
+
   // Returns whether this block dominates the blocked passed as parameter.
   bool Dominates(HBasicBlock* block) const;
 
@@ -949,7 +950,6 @@
   void SetLifetimeStart(size_t start) { lifetime_start_ = start; }
   void SetLifetimeEnd(size_t end) { lifetime_end_ = end; }
 
-
   bool EndsWithControlFlowInstruction() const;
   bool EndsWithIf() const;
   bool EndsWithTryBoundary() const;
@@ -1056,6 +1056,7 @@
   M(NullConstant, Instruction)                                          \
   M(NullCheck, Instruction)                                             \
   M(Or, BinaryOperation)                                                \
+  M(PackedSwitch, Instruction)                                          \
   M(ParallelMove, Instruction)                                          \
   M(ParameterValue, Instruction)                                        \
   M(Phi, Instruction)                                                   \
@@ -1066,6 +1067,10 @@
   M(Shr, BinaryOperation)                                               \
   M(StaticFieldGet, Instruction)                                        \
   M(StaticFieldSet, Instruction)                                        \
+  M(UnresolvedInstanceFieldGet, Instruction)                            \
+  M(UnresolvedInstanceFieldSet, Instruction)                            \
+  M(UnresolvedStaticFieldGet, Instruction)                              \
+  M(UnresolvedStaticFieldSet, Instruction)                              \
   M(StoreLocal, Instruction)                                            \
   M(Sub, BinaryOperation)                                               \
   M(SuspendCheck, Instruction)                                          \
@@ -1643,17 +1648,34 @@
   bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return IsValidHandle(type_handle_);
   }
+
   bool IsExact() const { return is_exact_; }
 
   bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(IsValid());
     return GetTypeHandle()->IsObjectClass();
   }
+
+  bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
+  }
+
   bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(IsValid());
     return GetTypeHandle()->IsInterface();
   }
 
+  bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return GetTypeHandle()->IsArrayClass();
+  }
+
+  bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
   Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
 
   bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -2221,7 +2243,9 @@
  public:
   int32_t GetValue() const { return value_; }
 
-  uint64_t GetValueAsUint64() const OVERRIDE { return static_cast<uint64_t>(value_); }
+  uint64_t GetValueAsUint64() const OVERRIDE {
+    return static_cast<uint64_t>(static_cast<uint32_t>(value_));
+  }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
     DCHECK(other->IsIntConstant());
@@ -2402,6 +2426,40 @@
   DISALLOW_COPY_AND_ASSIGN(HCurrentMethod);
 };
 
+// PackedSwitch (jump table). A block ending with a PackedSwitch instruction will
+// have one successor for each entry in the switch table, and the final successor
+// will be the block containing the next Dex opcode.
+class HPackedSwitch : public HTemplateInstruction<1> {
+ public:
+  HPackedSwitch(int32_t start_value,
+                uint32_t num_entries,
+                HInstruction* input,
+                uint32_t dex_pc = kNoDexPc)
+    : HTemplateInstruction(SideEffects::None(), dex_pc),
+      start_value_(start_value),
+      num_entries_(num_entries) {
+    SetRawInputAt(0, input);
+  }
+
+  bool IsControlFlow() const OVERRIDE { return true; }
+
+  int32_t GetStartValue() const { return start_value_; }
+
+  uint32_t GetNumEntries() const { return num_entries_; }
+
+  HBasicBlock* GetDefaultBlock() const {
+    // Last entry is the default block.
+    return GetBlock()->GetSuccessor(num_entries_);
+  }
+  DECLARE_INSTRUCTION(PackedSwitch);
+
+ private:
+  const int32_t start_value_;
+  const uint32_t num_entries_;
+
+  DISALLOW_COPY_AND_ASSIGN(HPackedSwitch);
+};
+
 class HUnaryOperation : public HExpression<1> {
  public:
   HUnaryOperation(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
@@ -4277,7 +4335,8 @@
                 SideEffectsForArchRuntimeCalls(value->GetType())), dex_pc),
         expected_component_type_(expected_component_type),
         needs_type_check_(value->GetType() == Primitive::kPrimNot),
-        value_can_be_null_(true) {
+        value_can_be_null_(true),
+        static_type_of_array_is_object_array_(false) {
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
     SetRawInputAt(2, value);
@@ -4306,8 +4365,13 @@
     value_can_be_null_ = false;
   }
 
+  void SetStaticTypeOfArrayIsObjectArray() {
+    static_type_of_array_is_object_array_ = true;
+  }
+
   bool GetValueCanBeNull() const { return value_can_be_null_; }
   bool NeedsTypeCheck() const { return needs_type_check_; }
+  bool StaticTypeOfArrayIsObjectArray() const { return static_type_of_array_is_object_array_; }
 
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
@@ -4334,6 +4398,9 @@
   const Primitive::Type expected_component_type_;
   bool needs_type_check_;
   bool value_can_be_null_;
+  // Cached information for the reference_type_info_ so that codegen
+  // does not need to inspect the static type.
+  bool static_type_of_array_is_object_array_;
 
   DISALLOW_COPY_AND_ASSIGN(HArraySet);
 };
@@ -4672,6 +4739,112 @@
   DISALLOW_COPY_AND_ASSIGN(HStaticFieldSet);
 };
 
+class HUnresolvedInstanceFieldGet : public HExpression<1> {
+ public:
+  HUnresolvedInstanceFieldGet(HInstruction* obj,
+                              Primitive::Type field_type,
+                              uint32_t field_index,
+                              uint32_t dex_pc)
+      : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc),
+        field_index_(field_index) {
+    SetRawInputAt(0, obj);
+  }
+
+  bool NeedsEnvironment() const OVERRIDE { return true; }
+  bool CanThrow() const OVERRIDE { return true; }
+
+  Primitive::Type GetFieldType() const { return GetType(); }
+  uint32_t GetFieldIndex() const { return field_index_; }
+
+  DECLARE_INSTRUCTION(UnresolvedInstanceFieldGet);
+
+ private:
+  const uint32_t field_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldGet);
+};
+
+class HUnresolvedInstanceFieldSet : public HTemplateInstruction<2> {
+ public:
+  HUnresolvedInstanceFieldSet(HInstruction* obj,
+                              HInstruction* value,
+                              Primitive::Type field_type,
+                              uint32_t field_index,
+                              uint32_t dex_pc)
+      : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
+        field_type_(field_type),
+        field_index_(field_index) {
+    DCHECK_EQ(field_type, value->GetType());
+    SetRawInputAt(0, obj);
+    SetRawInputAt(1, value);
+  }
+
+  bool NeedsEnvironment() const OVERRIDE { return true; }
+  bool CanThrow() const OVERRIDE { return true; }
+
+  Primitive::Type GetFieldType() const { return field_type_; }
+  uint32_t GetFieldIndex() const { return field_index_; }
+
+  DECLARE_INSTRUCTION(UnresolvedInstanceFieldSet);
+
+ private:
+  const Primitive::Type field_type_;
+  const uint32_t field_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldSet);
+};
+
+class HUnresolvedStaticFieldGet : public HExpression<0> {
+ public:
+  HUnresolvedStaticFieldGet(Primitive::Type field_type,
+                            uint32_t field_index,
+                            uint32_t dex_pc)
+      : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc),
+        field_index_(field_index) {
+  }
+
+  bool NeedsEnvironment() const OVERRIDE { return true; }
+  bool CanThrow() const OVERRIDE { return true; }
+
+  Primitive::Type GetFieldType() const { return GetType(); }
+  uint32_t GetFieldIndex() const { return field_index_; }
+
+  DECLARE_INSTRUCTION(UnresolvedStaticFieldGet);
+
+ private:
+  const uint32_t field_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldGet);
+};
+
+class HUnresolvedStaticFieldSet : public HTemplateInstruction<1> {
+ public:
+  HUnresolvedStaticFieldSet(HInstruction* value,
+                            Primitive::Type field_type,
+                            uint32_t field_index,
+                            uint32_t dex_pc)
+      : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
+        field_type_(field_type),
+        field_index_(field_index) {
+    DCHECK_EQ(field_type, value->GetType());
+    SetRawInputAt(0, value);
+  }
+
+  bool NeedsEnvironment() const OVERRIDE { return true; }
+  bool CanThrow() const OVERRIDE { return true; }
+
+  Primitive::Type GetFieldType() const { return field_type_; }
+  uint32_t GetFieldIndex() const { return field_index_; }
+
+  DECLARE_INSTRUCTION(UnresolvedStaticFieldSet);
+
+ private:
+  const Primitive::Type field_type_;
+  const uint32_t field_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldSet);
+};
+
 // Implement the move-exception DEX instruction.
 class HLoadException : public HExpression<0> {
  public:
@@ -5020,7 +5193,10 @@
 class HParallelMove : public HTemplateInstruction<0> {
  public:
   explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc), moves_(arena, kDefaultNumberOfMoves) {}
+      : HTemplateInstruction(SideEffects::None(), dex_pc),
+        moves_(arena->Adapter(kArenaAllocMoveOperands)) {
+    moves_.reserve(kDefaultNumberOfMoves);
+  }
 
   void AddMove(Location source,
                Location destination,
@@ -5030,15 +5206,15 @@
     DCHECK(destination.IsValid());
     if (kIsDebugBuild) {
       if (instruction != nullptr) {
-        for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
-          if (moves_.Get(i).GetInstruction() == instruction) {
+        for (const MoveOperands& move : moves_) {
+          if (move.GetInstruction() == instruction) {
             // Special case the situation where the move is for the spill slot
             // of the instruction.
             if ((GetPrevious() == instruction)
                 || ((GetPrevious() == nullptr)
                     && instruction->IsPhi()
                     && instruction->GetBlock() == GetBlock())) {
-              DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind())
+              DCHECK_NE(destination.GetKind(), move.GetDestination().GetKind())
                   << "Doing parallel moves for the same instruction.";
             } else {
               DCHECK(false) << "Doing parallel moves for the same instruction.";
@@ -5046,26 +5222,27 @@
           }
         }
       }
-      for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
-        DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination()))
+      for (const MoveOperands& move : moves_) {
+        DCHECK(!destination.OverlapsWith(move.GetDestination()))
             << "Overlapped destination for two moves in a parallel move: "
-            << moves_.Get(i).GetSource() << " ==> " << moves_.Get(i).GetDestination() << " and "
+            << move.GetSource() << " ==> " << move.GetDestination() << " and "
             << source << " ==> " << destination;
       }
     }
-    moves_.Add(MoveOperands(source, destination, type, instruction));
+    moves_.emplace_back(source, destination, type, instruction);
   }
 
-  MoveOperands* MoveOperandsAt(size_t index) const {
-    return moves_.GetRawStorage() + index;
+  MoveOperands* MoveOperandsAt(size_t index) {
+    DCHECK_LT(index, moves_.size());
+    return &moves_[index];
   }
 
-  size_t NumMoves() const { return moves_.Size(); }
+  size_t NumMoves() const { return moves_.size(); }
 
   DECLARE_INSTRUCTION(ParallelMove);
 
  private:
-  GrowableArray<MoveOperands> moves_;
+  ArenaVector<MoveOperands> moves_;
 
   DISALLOW_COPY_AND_ASSIGN(HParallelMove);
 };
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index a2b6131..3e982dc 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -321,8 +321,7 @@
 OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
     : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
       run_optimizations_(
-          (driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime)
-          && !driver->GetCompilerOptions().GetDebuggable()),
+          driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime),
       delegate_(Create(driver, Compiler::Kind::kQuick)) {}
 
 void OptimizingCompiler::Init() {
@@ -575,12 +574,6 @@
                                                      CompilerDriver* compiler_driver,
                                                      const DexCompilationUnit& dex_compilation_unit,
                                                      PassObserver* pass_observer) const {
-  if (graph->HasTryCatch() && graph->IsDebuggable()) {
-    // TODO: b/24054676, stop creating catch phis eagerly to avoid special cases like phis without
-    // inputs.
-    return nullptr;
-  }
-
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScopeCollection handles(soa.Self());
   soa.Self()->TransitionFromRunnableToSuspended(kNative);
@@ -836,8 +829,12 @@
   return compiled_method;
 }
 
-static bool HasOnlyUnresolvedFailures(const VerifiedMethod* verified_method) {
-  uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS;
+static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) {
+  // For access errors the compiler will use the unresolved helpers (e.g. HInvokeUnresolved).
+  uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS
+      | verifier::VerifyError::VERIFY_ERROR_ACCESS_CLASS
+      | verifier::VerifyError::VERIFY_ERROR_ACCESS_FIELD
+      | verifier::VerifyError::VERIFY_ERROR_ACCESS_METHOD;
   return (verified_method->GetEncounteredVerificationFailures() & (~unresolved_mask)) == 0;
 }
 
@@ -854,7 +851,7 @@
   const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
   DCHECK(!verified_method->HasRuntimeThrow());
   if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
-      || HasOnlyUnresolvedFailures(verified_method)) {
+      || CanHandleVerificationFailure(verified_method)) {
      method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
                          method_idx, jclass_loader, dex_file, dex_cache);
   } else {
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index c7701b7..df45c8e 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -34,6 +34,8 @@
   kInstructionSimplifications,
   kInstructionSimplificationsArch,
   kUnresolvedMethod,
+  kUnresolvedField,
+  kUnresolvedFieldNotAFastAccess,
   kNotCompiledBranchOutsideMethodCode,
   kNotCompiledCannotBuildSSA,
   kNotCompiledCantAccesType,
@@ -45,7 +47,6 @@
   kNotCompiledPathological,
   kNotCompiledSpaceFilter,
   kNotCompiledUnhandledInstruction,
-  kNotCompiledUnresolvedField,
   kNotCompiledUnsupportedIsa,
   kNotCompiledVerifyAtRuntime,
   kNotOptimizedDisabled,
@@ -104,6 +105,8 @@
       case kInstructionSimplifications: return "kInstructionSimplifications";
       case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch";
       case kUnresolvedMethod : return "kUnresolvedMethod";
+      case kUnresolvedField : return "kUnresolvedField";
+      case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess";
       case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
       case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
       case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
@@ -115,7 +118,6 @@
       case kNotCompiledPathological : return "kNotCompiledPathological";
       case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
-      case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
       case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
       case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
       case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index f9d812f..fce7769 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -16,6 +16,8 @@
 #include <iostream>
 
 #include "parallel_move_resolver.h"
+
+#include "base/stl_util.h"
 #include "nodes.h"
 
 namespace art {
@@ -28,19 +30,19 @@
   for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
     MoveOperands* move = parallel_move->MoveOperandsAt(i);
     if (!move->IsRedundant()) {
-      moves_.Add(move);
+      moves_.push_back(move);
     }
   }
 }
 
 void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
-  DCHECK(moves_.IsEmpty());
+  DCHECK(moves_.empty());
   // Build up a worklist of moves.
   BuildInitialMoveList(parallel_move);
 
   // Move stack/stack slot to take advantage of a free register on constrained machines.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& move = *moves_[i];
     // Ignore constants and moves already eliminated.
     if (move.IsEliminated() || move.GetSource().IsConstant()) {
       continue;
@@ -52,8 +54,8 @@
     }
   }
 
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& move = *moves_[i];
     // Skip constants to perform them last.  They don't block other moves
     // and skipping such moves with register destinations keeps those
     // registers free for the whole algorithm.
@@ -63,8 +65,8 @@
   }
 
   // Perform the moves with constant sources.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    MoveOperands* move = moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    MoveOperands* move = moves_[i];
     if (!move->IsEliminated()) {
       DCHECK(move->GetSource().IsConstant());
       EmitMove(i);
@@ -73,7 +75,7 @@
     }
   }
 
-  moves_.Reset();
+  moves_.clear();
 }
 
 Location LowOf(Location location) {
@@ -123,7 +125,8 @@
   // which means that a call to PerformMove could change any source operand
   // in the move graph.
 
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   DCHECK(!move->IsPending());
   if (move->IsRedundant()) {
     // Because we swap register pairs first, following, un-pending
@@ -143,8 +146,8 @@
   // as this one's destination blocks this one so recursively perform all
   // such moves.
   MoveOperands* required_swap = nullptr;
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& other_move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& other_move = *moves_[i];
     if (other_move.Blocks(destination) && !other_move.IsPending()) {
       // Though PerformMove can change any source operand in the move graph,
       // calling `PerformMove` cannot create a blocking move via a swap
@@ -163,7 +166,7 @@
         // at the next moves. Swapping is not blocked by anything, it just
         // updates other moves's source.
         break;
-      } else if (required_swap == moves_.Get(i)) {
+      } else if (required_swap == moves_[i]) {
         // If `other_move` was swapped, we iterate again to find a new
         // potential cycle.
         required_swap = nullptr;
@@ -171,7 +174,7 @@
       } else if (required_swap != nullptr) {
         // A move is required to swap. We walk back the cycle to find the
         // move by just returning from this `PerforrmMove`.
-        moves_.Get(index)->ClearPending(destination);
+        moves_[index]->ClearPending(destination);
         return required_swap;
       }
     }
@@ -197,14 +200,13 @@
     DCHECK_EQ(required_swap, move);
     do_swap = true;
   } else {
-    for (size_t i = 0; i < moves_.Size(); ++i) {
-      const MoveOperands& other_move = *moves_.Get(i);
-      if (other_move.Blocks(destination)) {
-        DCHECK(other_move.IsPending());
-        if (!move->Is64BitMove() && other_move.Is64BitMove()) {
+    for (MoveOperands* other_move : moves_) {
+      if (other_move->Blocks(destination)) {
+        DCHECK(other_move->IsPending());
+        if (!move->Is64BitMove() && other_move->Is64BitMove()) {
           // We swap 64bits moves before swapping 32bits moves. Go back from the
           // cycle by returning the move that must be swapped.
-          return moves_.Get(i);
+          return other_move;
         }
         do_swap = true;
         break;
@@ -220,12 +222,11 @@
     Location source = move->GetSource();
     Location swap_destination = move->GetDestination();
     move->Eliminate();
-    for (size_t i = 0; i < moves_.Size(); ++i) {
-      const MoveOperands& other_move = *moves_.Get(i);
-      if (other_move.Blocks(source)) {
-        UpdateSourceOf(moves_.Get(i), source, swap_destination);
-      } else if (other_move.Blocks(swap_destination)) {
-        UpdateSourceOf(moves_.Get(i), swap_destination, source);
+    for (MoveOperands* other_move : moves_) {
+      if (other_move->Blocks(source)) {
+        UpdateSourceOf(other_move, source, swap_destination);
+      } else if (other_move->Blocks(swap_destination)) {
+        UpdateSourceOf(other_move, swap_destination, source);
       }
     }
     // If the swap was required because of a 64bits move in the middle of a cycle,
@@ -242,14 +243,14 @@
 }
 
 bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    if (moves_.Get(i)->Blocks(loc)) {
+  for (MoveOperands* move : moves_) {
+    if (move->Blocks(loc)) {
       return false;
     }
   }
 
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    if (moves_.Get(i)->GetDestination().Equals(loc)) {
+  for (MoveOperands* move : moves_) {
+    if (move->GetDestination().Equals(loc)) {
       return true;
     }
   }
@@ -302,8 +303,8 @@
 
 void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
   DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
-  DCHECK(moves_.IsEmpty());
-  DCHECK(scratches_.IsEmpty());
+  DCHECK(moves_.empty());
+  DCHECK(scratches_.empty());
 
   // Backend dependent initialization.
   PrepareForEmitNativeCode();
@@ -311,8 +312,8 @@
   // Build up a worklist of moves.
   BuildInitialMoveList(parallel_move);
 
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& move = *moves_[i];
     // Skip constants to perform them last. They don't block other moves and
     // skipping such moves with register destinations keeps those registers
     // free for the whole algorithm.
@@ -324,8 +325,8 @@
   // Perform the moves with constant sources and register destinations with UpdateMoveSource()
   // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
   // from changing the constant sources to stack locations.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    MoveOperands* move = moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    MoveOperands* move = moves_[i];
     Location destination = move->GetDestination();
     if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
       Location source = move->GetSource();
@@ -344,8 +345,8 @@
   }
 
   // Perform the rest of the moves.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    MoveOperands* move = moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    MoveOperands* move = moves_[i];
     if (!move->IsEliminated()) {
       EmitMove(i);
       move->Eliminate();
@@ -358,19 +359,18 @@
   // Backend dependent cleanup.
   FinishEmitNativeCode();
 
-  moves_.Reset();
-  scratches_.Reset();
+  moves_.clear();
+  scratches_.clear();
 }
 
 Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
-  for (size_t i = 0; i < scratches_.Size(); ++i) {
-    Location loc = scratches_.Get(i);
+  for (Location loc : scratches_) {
     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
       return loc;
     }
   }
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    Location loc = moves_.Get(i)->GetDestination();
+  for (MoveOperands* move : moves_) {
+    Location loc = move->GetDestination();
     if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
       return loc;
     }
@@ -380,18 +380,18 @@
 
 void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
   if (kIsDebugBuild) {
-    for (size_t i = 0; i < scratches_.Size(); ++i) {
-      DCHECK(!loc.Equals(scratches_.Get(i)));
+    for (Location scratch : scratches_) {
+      CHECK(!loc.Equals(scratch));
     }
   }
-  scratches_.Add(loc);
+  scratches_.push_back(loc);
 }
 
 void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
   DCHECK(!IsBlockedByMoves(loc));
-  for (size_t i = 0; i < scratches_.Size(); ++i) {
-    if (loc.Equals(scratches_.Get(i))) {
-      scratches_.DeleteAt(i);
+  for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) {
+    if (loc.Equals(*it)) {
+      scratches_.erase(it);
       break;
     }
   }
@@ -406,7 +406,8 @@
   // we will update source operand in the move graph to reduce dependencies in
   // the graph.
 
-  MoveOperands* move = moves_.Get(index);
+  DCHECK_LT(index, moves_.size());
+  MoveOperands* move = moves_[index];
   DCHECK(!move->IsPending());
   DCHECK(!move->IsEliminated());
   if (move->IsRedundant()) {
@@ -433,8 +434,8 @@
   // dependencies. Any unperformed, unpending move with a source the same
   // as this one's destination blocks this one so recursively perform all
   // such moves.
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    const MoveOperands& other_move = *moves_.Get(i);
+  for (size_t i = 0; i < moves_.size(); ++i) {
+    const MoveOperands& other_move = *moves_[i];
     if (other_move.Blocks(destination) && !other_move.IsPending()) {
       PerformMove(i);
     }
@@ -490,8 +491,11 @@
     move->Eliminate();
     UpdateMoveSource(pending_source, pending_destination);
     // Free any unblocked locations in the scratch location list.
-    for (size_t i = 0; i < scratches_.Size(); ++i) {
-      Location scratch = scratches_.Get(i);
+    // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop.
+    // FIXME: If FreeScratchLocation() removes the location from scratches_,
+    // we skip the next location. This happens for arm64.
+    for (size_t i = 0; i < scratches_.size(); ++i) {
+      Location scratch = scratches_[i];
       // Only scratch overlapping with performed move source can be unblocked.
       if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
         FreeScratchLocation(pending_source);
@@ -512,8 +516,7 @@
   // This is not something we must do, but we can use fewer scratch locations with
   // this trick. For example, we can avoid using additional scratch locations for
   // moves (0 -> 1), (1 -> 2), (1 -> 0).
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    MoveOperands* move = moves_.Get(i);
+  for (MoveOperands* move : moves_) {
     if (move->GetSource().Equals(from)) {
       move->SetSource(to);
     }
@@ -522,16 +525,15 @@
 
 void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
     Location destination, Primitive::Type type) {
-  pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr));
+  pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr));
 }
 
 void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
-  pending_moves_.Delete(move);
+  RemoveElement(pending_moves_, move);
 }
 
 MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
-  for (size_t i = 0; i < pending_moves_.Size(); ++i) {
-    MoveOperands* move = pending_moves_.Get(i);
+  for (MoveOperands* move : pending_moves_) {
     Location destination = move->GetDestination();
     // Only moves with destination overlapping with input loc can be unblocked.
     if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
@@ -542,13 +544,13 @@
 }
 
 bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
-  for (size_t i = 0; i < pending_moves_.Size(); ++i) {
-    if (pending_moves_.Get(i)->Blocks(loc)) {
+  for (MoveOperands* move : pending_moves_) {
+    if (move->Blocks(loc)) {
       return true;
     }
   }
-  for (size_t i = 0; i < moves_.Size(); ++i) {
-    if (moves_.Get(i)->Blocks(loc)) {
+  for (MoveOperands* move : moves_) {
+    if (move->Blocks(loc)) {
       return true;
     }
   }
@@ -558,7 +560,7 @@
 // So far it is only used for debugging purposes to make sure all pending moves
 // have been performed.
 size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
-  return pending_moves_.Size();
+  return pending_moves_.size();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 9ede910..4278861 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -17,8 +17,8 @@
 #ifndef ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_
 #define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_
 
+#include "base/arena_containers.h"
 #include "base/value_object.h"
-#include "utils/growable_array.h"
 #include "locations.h"
 #include "primitive.h"
 
@@ -31,7 +31,10 @@
 // have their own subclass that implements corresponding virtual functions.
 class ParallelMoveResolver : public ValueObject {
  public:
-  explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {}
+  explicit ParallelMoveResolver(ArenaAllocator* allocator)
+      : moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)) {
+    moves_.reserve(32);
+  }
   virtual ~ParallelMoveResolver() {}
 
   // Resolve a set of parallel moves, emitting assembler instructions.
@@ -41,7 +44,7 @@
   // Build the initial list of moves.
   void BuildInitialMoveList(HParallelMove* parallel_move);
 
-  GrowableArray<MoveOperands*> moves_;
+  ArenaVector<MoveOperands*> moves_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
@@ -120,8 +123,13 @@
 class ParallelMoveResolverNoSwap : public ParallelMoveResolver {
  public:
   explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator)
-      : ParallelMoveResolver(allocator), scratches_(allocator, 32),
-        pending_moves_(allocator, 8), allocator_(allocator) {}
+      : ParallelMoveResolver(allocator),
+        scratches_(allocator->Adapter(kArenaAllocParallelMoveResolver)),
+        pending_moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)),
+        allocator_(allocator) {
+    scratches_.reserve(32);
+    pending_moves_.reserve(8);
+  }
   virtual ~ParallelMoveResolverNoSwap() {}
 
   // Resolve a set of parallel moves, emitting assembler instructions.
@@ -160,7 +168,7 @@
   void RemoveScratchLocation(Location loc);
 
   // List of scratch locations.
-  GrowableArray<Location> scratches_;
+  ArenaVector<Location> scratches_;
 
  private:
   // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy
@@ -183,7 +191,7 @@
   size_t GetNumberOfPendingMoves();
 
   // Additional pending moves which might be added to resolve dependency cycle.
-  GrowableArray<MoveOperands*> pending_moves_;
+  ArenaVector<MoveOperands*> pending_moves_;
 
   // Used to allocate pending MoveOperands.
   ArenaAllocator* const allocator_;
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index f8f7010..da91cb8 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -56,7 +56,8 @@
       : ParallelMoveResolverWithSwap(allocator) {}
 
   void EmitMove(size_t index) OVERRIDE {
-    MoveOperands* move = moves_.Get(index);
+    DCHECK_LT(index, moves_.size());
+    MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
     }
@@ -68,7 +69,8 @@
   }
 
   void EmitSwap(size_t index) OVERRIDE {
-    MoveOperands* move = moves_.Get(index);
+    DCHECK_LT(index, moves_.size());
+    MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
     }
@@ -127,7 +129,8 @@
   void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {}
 
   void EmitMove(size_t index) OVERRIDE {
-    MoveOperands* move = moves_.Get(index);
+    DCHECK_LT(index, moves_.size());
+    MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
     }
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index af93438..c98f43e 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -108,8 +108,9 @@
 }
 
 void PrimitiveTypePropagation::ProcessWorklist() {
-  while (!worklist_.IsEmpty()) {
-    HPhi* instruction = worklist_.Pop();
+  while (!worklist_.empty()) {
+    HPhi* instruction = worklist_.back();
+    worklist_.pop_back();
     if (UpdateType(instruction)) {
       AddDependentInstructionsToWorklist(instruction);
     }
@@ -118,7 +119,7 @@
 
 void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
   DCHECK(instruction->IsLive());
-  worklist_.Add(instruction);
+  worklist_.push_back(instruction);
 }
 
 void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h
index 6d370ed..212fcfc 100644
--- a/compiler/optimizing/primitive_type_propagation.h
+++ b/compiler/optimizing/primitive_type_propagation.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
 #define ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_
 
+#include "base/arena_containers.h"
 #include "nodes.h"
 
 namespace art {
@@ -25,7 +26,9 @@
 class PrimitiveTypePropagation : public ValueObject {
  public:
   explicit PrimitiveTypePropagation(HGraph* graph)
-      : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+      : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocPrimitiveTypePropagation)) {
+    worklist_.reserve(kDefaultWorklistSize);
+  }
 
   void Run();
 
@@ -37,7 +40,7 @@
   bool UpdateType(HPhi* phi);
 
   HGraph* const graph_;
-  GrowableArray<HPhi*> worklist_;
+  ArenaVector<HPhi*> worklist_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
 
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index a88c543..d22f254 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -27,7 +27,7 @@
  public:
   RTPVisitor(HGraph* graph,
              StackHandleScopeCollection* handles,
-             GrowableArray<HInstruction*>* worklist,
+             ArenaVector<HInstruction*>* worklist,
              ReferenceTypeInfo::TypeHandle object_class_handle,
              ReferenceTypeInfo::TypeHandle class_class_handle,
              ReferenceTypeInfo::TypeHandle string_class_handle,
@@ -52,6 +52,8 @@
   void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact);
   void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE;
   void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE;
+  void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE;
+  void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) OVERRIDE;
   void VisitInvoke(HInvoke* instr) OVERRIDE;
   void VisitArrayGet(HArrayGet* instr) OVERRIDE;
   void VisitCheckCast(HCheckCast* instr) OVERRIDE;
@@ -68,7 +70,7 @@
   ReferenceTypeInfo::TypeHandle class_class_handle_;
   ReferenceTypeInfo::TypeHandle string_class_handle_;
   ReferenceTypeInfo::TypeHandle throwable_class_handle_;
-  GrowableArray<HInstruction*>* worklist_;
+  ArenaVector<HInstruction*>* worklist_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
 };
@@ -78,7 +80,8 @@
                                                    const char* name)
     : HOptimization(graph, name),
       handles_(handles),
-      worklist_(graph->GetArena(), kDefaultWorklistSize) {
+      worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)) {
+  worklist_.reserve(kDefaultWorklistSize);
   // Mutator lock is required for NewHandle, but annotalysis ignores constructors.
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
@@ -449,6 +452,22 @@
   UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo());
 }
 
+void RTPVisitor::VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) {
+  // TODO: Use descriptor to get the actual type.
+  if (instr->GetFieldType() == Primitive::kPrimNot) {
+    instr->SetReferenceTypeInfo(
+      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+  }
+}
+
+void RTPVisitor::VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) {
+  // TODO: Use descriptor to get the actual type.
+  if (instr->GetFieldType() == Primitive::kPrimNot) {
+    instr->SetReferenceTypeInfo(
+      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+  }
+}
+
 void RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::DexCache* dex_cache =
@@ -649,7 +668,7 @@
   ScopedObjectAccess soa(Thread::Current());
   UpdateArrayGet(instr, handles_, object_class_handle_);
   if (!instr->GetReferenceTypeInfo().IsValid()) {
-    worklist_->Add(instr);
+    worklist_->push_back(instr);
   }
 }
 
@@ -718,8 +737,9 @@
 }
 
 void ReferenceTypePropagation::ProcessWorklist() {
-  while (!worklist_.IsEmpty()) {
-    HInstruction* instruction = worklist_.Pop();
+  while (!worklist_.empty()) {
+    HInstruction* instruction = worklist_.back();
+    worklist_.pop_back();
     if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) {
       AddDependentInstructionsToWorklist(instruction);
     }
@@ -729,7 +749,7 @@
 void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) {
   DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot)
       << instruction->DebugName() << ":" << instruction->GetType();
-  worklist_.Add(instruction);
+  worklist_.push_back(instruction);
 }
 
 void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 62f6ab8..5493601 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
 #define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
 
+#include "base/arena_containers.h"
 #include "driver/dex_compilation_unit.h"
 #include "handle_scope-inl.h"
 #include "nodes.h"
@@ -57,7 +58,7 @@
 
   StackHandleScopeCollection* handles_;
 
-  GrowableArray<HInstruction*> worklist_;
+  ArenaVector<HInstruction*> worklist_;
 
   ReferenceTypeInfo::TypeHandle object_class_handle_;
   ReferenceTypeInfo::TypeHandle class_class_handle_;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index a4f1f45..9cdb89b 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -43,21 +43,21 @@
       : allocator_(allocator),
         codegen_(codegen),
         liveness_(liveness),
-        unhandled_core_intervals_(allocator, 0),
-        unhandled_fp_intervals_(allocator, 0),
+        unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
         unhandled_(nullptr),
-        handled_(allocator, 0),
-        active_(allocator, 0),
-        inactive_(allocator, 0),
-        physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()),
-        physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()),
-        temp_intervals_(allocator, 4),
-        int_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
-        long_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
-        float_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
-        double_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        handled_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        active_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
+        double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)),
         catch_phi_spill_slots_(0),
-        safepoints_(allocator, 0),
+        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
         processing_core_registers_(false),
         number_of_registers_(-1),
         registers_array_(nullptr),
@@ -66,10 +66,16 @@
         reserved_out_slots_(0),
         maximum_number_of_live_core_registers_(0),
         maximum_number_of_live_fp_registers_(0) {
+  temp_intervals_.reserve(4);
+  int_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  long_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+  double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
+
   static constexpr bool kIsBaseline = false;
   codegen->SetupBlockedRegisters(kIsBaseline);
-  physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
-  physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters());
+  physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
+  physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
   // Always reserve for the current method and the graph's max out registers.
   // TODO: compute it instead.
   // ArtMethod* takes 2 vregs for 64 bits.
@@ -129,17 +135,17 @@
   int reg = location.reg();
   DCHECK(location.IsRegister() || location.IsFpuRegister());
   LiveInterval* interval = location.IsRegister()
-      ? physical_core_register_intervals_.Get(reg)
-      : physical_fp_register_intervals_.Get(reg);
+      ? physical_core_register_intervals_[reg]
+      : physical_fp_register_intervals_[reg];
   Primitive::Type type = location.IsRegister()
       ? Primitive::kPrimInt
       : Primitive::kPrimFloat;
   if (interval == nullptr) {
     interval = LiveInterval::MakeFixedInterval(allocator_, reg, type);
     if (location.IsRegister()) {
-      physical_core_register_intervals_.Put(reg, interval);
+      physical_core_register_intervals_[reg] = interval;
     } else {
-      physical_fp_register_intervals_.Put(reg, interval);
+      physical_fp_register_intervals_[reg] = interval;
     }
   }
   DCHECK(interval->GetRegister() == reg);
@@ -181,37 +187,37 @@
   }
 
   number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
-  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_);
+  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+                                                    kArenaAllocRegisterAllocator);
   processing_core_registers_ = true;
   unhandled_ = &unhandled_core_intervals_;
-  for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) {
-    LiveInterval* fixed = physical_core_register_intervals_.Get(i);
+  for (LiveInterval* fixed : physical_core_register_intervals_) {
     if (fixed != nullptr) {
       // Fixed interval is added to inactive_ instead of unhandled_.
       // It's also the only type of inactive interval whose start position
       // can be after the current interval during linear scan.
       // Fixed interval is never split and never moves to unhandled_.
-      inactive_.Add(fixed);
+      inactive_.push_back(fixed);
     }
   }
   LinearScan();
 
-  inactive_.Reset();
-  active_.Reset();
-  handled_.Reset();
+  inactive_.clear();
+  active_.clear();
+  handled_.clear();
 
   number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters();
-  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_);
+  registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_,
+                                                    kArenaAllocRegisterAllocator);
   processing_core_registers_ = false;
   unhandled_ = &unhandled_fp_intervals_;
-  for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) {
-    LiveInterval* fixed = physical_fp_register_intervals_.Get(i);
+  for (LiveInterval* fixed : physical_fp_register_intervals_) {
     if (fixed != nullptr) {
       // Fixed interval is added to inactive_ instead of unhandled_.
       // It's also the only type of inactive interval whose start position
       // can be after the current interval during linear scan.
       // Fixed interval is never split and never moves to unhandled_.
-      inactive_.Add(fixed);
+      inactive_.push_back(fixed);
     }
   }
   LinearScan();
@@ -236,24 +242,24 @@
         case Location::kRequiresRegister: {
           LiveInterval* interval =
               LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
-          temp_intervals_.Add(interval);
+          temp_intervals_.push_back(interval);
           interval->AddTempUse(instruction, i);
-          unhandled_core_intervals_.Add(interval);
+          unhandled_core_intervals_.push_back(interval);
           break;
         }
 
         case Location::kRequiresFpuRegister: {
           LiveInterval* interval =
               LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
-          temp_intervals_.Add(interval);
+          temp_intervals_.push_back(interval);
           interval->AddTempUse(instruction, i);
           if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) {
             interval->AddHighInterval(/* is_temp */ true);
             LiveInterval* high = interval->GetHighInterval();
-            temp_intervals_.Add(high);
-            unhandled_fp_intervals_.Add(high);
+            temp_intervals_.push_back(high);
+            unhandled_fp_intervals_.push_back(high);
           }
-          unhandled_fp_intervals_.Add(interval);
+          unhandled_fp_intervals_.push_back(interval);
           break;
         }
 
@@ -276,7 +282,7 @@
       instruction->GetBlock()->RemoveInstruction(instruction);
       return;
     }
-    safepoints_.Add(instruction);
+    safepoints_.push_back(instruction);
     if (locations->OnlyCallsOnSlowPath()) {
       // We add a synthesized range at this position to record the live registers
       // at this position. Ideally, we could just update the safepoints when locations
@@ -310,28 +316,28 @@
   LiveInterval* current = instruction->GetLiveInterval();
   if (current == nullptr) return;
 
-  GrowableArray<LiveInterval*>& unhandled = core_register
+  ArenaVector<LiveInterval*>& unhandled = core_register
       ? unhandled_core_intervals_
       : unhandled_fp_intervals_;
 
-  DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek()));
+  DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back()));
 
   if (codegen_->NeedsTwoRegisters(current->GetType())) {
     current->AddHighInterval();
   }
 
-  for (size_t safepoint_index = safepoints_.Size(); safepoint_index > 0; --safepoint_index) {
-    HInstruction* safepoint = safepoints_.Get(safepoint_index - 1);
+  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
+    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
     size_t safepoint_position = safepoint->GetLifetimePosition();
 
     // Test that safepoints are ordered in the optimal way.
-    DCHECK(safepoint_index == safepoints_.Size()
-           || safepoints_.Get(safepoint_index)->GetLifetimePosition() < safepoint_position);
+    DCHECK(safepoint_index == safepoints_.size() ||
+           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
 
     if (safepoint_position == current->GetStart()) {
       // The safepoint is for this instruction, so the location of the instruction
       // does not need to be saved.
-      DCHECK_EQ(safepoint_index, safepoints_.Size());
+      DCHECK_EQ(safepoint_index, safepoints_.size());
       DCHECK_EQ(safepoint, instruction);
       continue;
     } else if (current->IsDeadAt(safepoint_position)) {
@@ -437,34 +443,26 @@
 bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
   // To simplify unit testing, we eagerly create the array of intervals, and
   // call the helper method.
-  GrowableArray<LiveInterval*> intervals(allocator_, 0);
+  ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
   for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
     HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
     if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) {
-      intervals.Add(instruction->GetLiveInterval());
+      intervals.push_back(instruction->GetLiveInterval());
     }
   }
 
-  if (processing_core_registers_) {
-    for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) {
-      LiveInterval* fixed = physical_core_register_intervals_.Get(i);
-      if (fixed != nullptr) {
-        intervals.Add(fixed);
-      }
-    }
-  } else {
-    for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) {
-      LiveInterval* fixed = physical_fp_register_intervals_.Get(i);
-      if (fixed != nullptr) {
-        intervals.Add(fixed);
-      }
+  const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_
+      ? &physical_core_register_intervals_
+      : &physical_fp_register_intervals_;
+  for (LiveInterval* fixed : *physical_register_intervals) {
+    if (fixed != nullptr) {
+      intervals.push_back(fixed);
     }
   }
 
-  for (size_t i = 0, e = temp_intervals_.Size(); i < e; ++i) {
-    LiveInterval* temp = temp_intervals_.Get(i);
+  for (LiveInterval* temp : temp_intervals_) {
     if (ShouldProcess(processing_core_registers_, temp)) {
-      intervals.Add(temp);
+      intervals.push_back(temp);
     }
   }
 
@@ -472,7 +470,7 @@
                            allocator_, processing_core_registers_, log_fatal_on_failure);
 }
 
-bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& intervals,
+bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
                                           size_t number_of_spill_slots,
                                           size_t number_of_out_slots,
                                           const CodeGenerator& codegen,
@@ -482,26 +480,27 @@
   size_t number_of_registers = processing_core_registers
       ? codegen.GetNumberOfCoreRegisters()
       : codegen.GetNumberOfFloatingPointRegisters();
-  GrowableArray<ArenaBitVector*> liveness_of_values(
-      allocator, number_of_registers + number_of_spill_slots);
+  ArenaVector<ArenaBitVector*> liveness_of_values(
+      allocator->Adapter(kArenaAllocRegisterAllocator));
+  liveness_of_values.reserve(number_of_registers + number_of_spill_slots);
 
   // Allocate a bit vector per register. A live interval that has a register
   // allocated will populate the associated bit vector based on its live ranges.
   for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) {
-    liveness_of_values.Add(new (allocator) ArenaBitVector(allocator, 0, true));
+    liveness_of_values.push_back(new (allocator) ArenaBitVector(allocator, 0, true));
   }
 
-  for (size_t i = 0, e = intervals.Size(); i < e; ++i) {
-    for (AllRangesIterator it(intervals.Get(i)); !it.Done(); it.Advance()) {
+  for (LiveInterval* start_interval : intervals) {
+    for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) {
       LiveInterval* current = it.CurrentInterval();
       HInstruction* defined_by = current->GetParent()->GetDefinedBy();
       if (current->GetParent()->HasSpillSlot()
            // Parameters and current method have their own stack slot.
            && !(defined_by != nullptr && (defined_by->IsParameterValue()
                                           || defined_by->IsCurrentMethod()))) {
-        BitVector* liveness_of_spill_slot = liveness_of_values.Get(number_of_registers
+        BitVector* liveness_of_spill_slot = liveness_of_values[number_of_registers
             + current->GetParent()->GetSpillSlot() / kVRegSize
-            - number_of_out_slots);
+            - number_of_out_slots];
         for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
           if (liveness_of_spill_slot->IsBitSet(j)) {
             if (log_fatal_on_failure) {
@@ -523,7 +522,7 @@
           // and test code may not properly fill the right information to the code generator.
           CHECK(codegen.HasAllocatedRegister(processing_core_registers, current->GetRegister()));
         }
-        BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister());
+        BitVector* liveness_of_register = liveness_of_values[current->GetRegister()];
         for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
           if (liveness_of_register->IsBitSet(j)) {
             if (current->IsUsingInputRegister() && current->CanUseInputRegister()) {
@@ -572,93 +571,101 @@
 
 void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const {
   stream << "inactive: " << std::endl;
-  for (size_t i = 0; i < inactive_.Size(); i ++) {
-    DumpInterval(stream, inactive_.Get(i));
+  for (LiveInterval* inactive_interval : inactive_) {
+    DumpInterval(stream, inactive_interval);
   }
   stream << "active: " << std::endl;
-  for (size_t i = 0; i < active_.Size(); i ++) {
-    DumpInterval(stream, active_.Get(i));
+  for (LiveInterval* active_interval : active_) {
+    DumpInterval(stream, active_interval);
   }
   stream << "unhandled: " << std::endl;
   auto unhandled = (unhandled_ != nullptr) ?
       unhandled_ : &unhandled_core_intervals_;
-  for (size_t i = 0; i < unhandled->Size(); i ++) {
-    DumpInterval(stream, unhandled->Get(i));
+  for (LiveInterval* unhandled_interval : *unhandled) {
+    DumpInterval(stream, unhandled_interval);
   }
   stream << "handled: " << std::endl;
-  for (size_t i = 0; i < handled_.Size(); i ++) {
-    DumpInterval(stream, handled_.Get(i));
+  for (LiveInterval* handled_interval : handled_) {
+    DumpInterval(stream, handled_interval);
   }
 }
 
 // By the book implementation of a linear scan register allocator.
 void RegisterAllocator::LinearScan() {
-  while (!unhandled_->IsEmpty()) {
+  while (!unhandled_->empty()) {
     // (1) Remove interval with the lowest start position from unhandled.
-    LiveInterval* current = unhandled_->Pop();
+    LiveInterval* current = unhandled_->back();
+    unhandled_->pop_back();
 
     // Make sure the interval is an expected state.
     DCHECK(!current->IsFixed() && !current->HasSpillSlot());
     // Make sure we are going in the right order.
-    DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart());
+    DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart());
     // Make sure a low interval is always with a high.
-    DCHECK(!current->IsLowInterval() || unhandled_->Peek()->IsHighInterval());
+    DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval());
     // Make sure a high interval is always with a low.
     DCHECK(current->IsLowInterval() ||
-           unhandled_->IsEmpty() ||
-           !unhandled_->Peek()->IsHighInterval());
+           unhandled_->empty() ||
+           !unhandled_->back()->IsHighInterval());
 
     size_t position = current->GetStart();
 
     // Remember the inactive_ size here since the ones moved to inactive_ from
     // active_ below shouldn't need to be re-checked.
-    size_t inactive_intervals_to_handle = inactive_.Size();
+    size_t inactive_intervals_to_handle = inactive_.size();
 
     // (2) Remove currently active intervals that are dead at this position.
     //     Move active intervals that have a lifetime hole at this position
     //     to inactive.
-    for (size_t i = 0; i < active_.Size(); ++i) {
-      LiveInterval* interval = active_.Get(i);
+    // Note: Copy elements we keep to the beginning, just like
+    //     v.erase(std::remove(v.begin(), v.end(), value), v.end());
+    auto active_kept_end = active_.begin();
+    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+      LiveInterval* interval = *it;
       if (interval->IsDeadAt(position)) {
-        active_.Delete(interval);
-        --i;
-        handled_.Add(interval);
+        handled_.push_back(interval);
       } else if (!interval->Covers(position)) {
-        active_.Delete(interval);
-        --i;
-        inactive_.Add(interval);
+        inactive_.push_back(interval);
+      } else {
+        *active_kept_end++ = interval;  // Keep this interval.
       }
     }
+    // We have copied what we want to keep to [active_.begin(), active_kept_end),
+    // the rest of the data in active_ is junk - drop it.
+    active_.erase(active_kept_end, active_.end());
 
     // (3) Remove currently inactive intervals that are dead at this position.
     //     Move inactive intervals that cover this position to active.
-    for (size_t i = 0; i < inactive_intervals_to_handle; ++i) {
-      LiveInterval* interval = inactive_.Get(i);
+    // Note: Copy elements we keep to the beginning, just like
+    //     v.erase(std::remove(v.begin(), v.begin() + num, value), v.begin() + num);
+    auto inactive_kept_end = inactive_.begin();
+    auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
+    for (auto it = inactive_.begin(); it != inactive_to_handle_end; ++it) {
+      LiveInterval* interval = *it;
       DCHECK(interval->GetStart() < position || interval->IsFixed());
       if (interval->IsDeadAt(position)) {
-        inactive_.Delete(interval);
-        --i;
-        --inactive_intervals_to_handle;
-        handled_.Add(interval);
+        handled_.push_back(interval);
       } else if (interval->Covers(position)) {
-        inactive_.Delete(interval);
-        --i;
-        --inactive_intervals_to_handle;
-        active_.Add(interval);
+        active_.push_back(interval);
+      } else {
+        *inactive_kept_end++ = interval;  // Keep this interval.
       }
     }
+    // We have copied what we want to keep to [inactive_.begin(), inactive_kept_end),
+    // the rest of the data in the processed interval is junk - drop it.
+    inactive_.erase(inactive_kept_end, inactive_to_handle_end);
 
     if (current->IsSlowPathSafepoint()) {
       // Synthesized interval to record the maximum number of live registers
       // at safepoints. No need to allocate a register for it.
       if (processing_core_registers_) {
         maximum_number_of_live_core_registers_ =
-          std::max(maximum_number_of_live_core_registers_, active_.Size());
+          std::max(maximum_number_of_live_core_registers_, active_.size());
       } else {
         maximum_number_of_live_fp_registers_ =
-          std::max(maximum_number_of_live_fp_registers_, active_.Size());
+          std::max(maximum_number_of_live_fp_registers_, active_.size());
       }
-      DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart());
+      DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart());
       continue;
     }
 
@@ -683,7 +690,7 @@
       codegen_->AddAllocatedRegister(processing_core_registers_
           ? Location::RegisterLocation(current->GetRegister())
           : Location::FpuRegisterLocation(current->GetRegister()));
-      active_.Add(current);
+      active_.push_back(current);
       if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) {
         current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister()));
       }
@@ -726,8 +733,7 @@
   }
 
   // For each active interval, set its register to not free.
-  for (size_t i = 0, e = active_.Size(); i < e; ++i) {
-    LiveInterval* interval = active_.Get(i);
+  for (LiveInterval* interval : active_) {
     DCHECK(interval->HasRegister());
     free_until[interval->GetRegister()] = 0;
   }
@@ -762,8 +768,7 @@
 
   // For each inactive interval, set its register to be free until
   // the next intersection with `current`.
-  for (size_t i = 0, e = inactive_.Size(); i < e; ++i) {
-    LiveInterval* inactive = inactive_.Get(i);
+  for (LiveInterval* inactive : inactive_) {
     // Temp/Slow-path-safepoint interval has no holes.
     DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
     if (!current->IsSplit() && !inactive->IsFixed()) {
@@ -923,11 +928,29 @@
   return reg;
 }
 
+// Remove interval and its other half if any. Return iterator to the following element.
+static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf(
+    ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) {
+  DCHECK(intervals->begin() <= pos && pos < intervals->end());
+  LiveInterval* interval = *pos;
+  if (interval->IsLowInterval()) {
+    DCHECK(pos + 1 < intervals->end());
+    DCHECK_EQ(*(pos + 1), interval->GetHighInterval());
+    return intervals->erase(pos, pos + 2);
+  } else if (interval->IsHighInterval()) {
+    DCHECK(intervals->begin() < pos);
+    DCHECK_EQ(*(pos - 1), interval->GetLowInterval());
+    return intervals->erase(pos - 1, pos + 1);
+  } else {
+    return intervals->erase(pos);
+  }
+}
+
 bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position,
                                                                  size_t first_register_use,
                                                                  size_t* next_use) {
-  for (size_t i = 0, e = active_.Size(); i < e; ++i) {
-    LiveInterval* active = active_.Get(i);
+  for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+    LiveInterval* active = *it;
     DCHECK(active->HasRegister());
     if (active->IsFixed()) continue;
     if (active->IsHighInterval()) continue;
@@ -941,11 +964,10 @@
         IsLowOfUnalignedPairInterval(active) ||
         !IsLowRegister(active->GetRegister())) {
       LiveInterval* split = Split(active, position);
-      active_.DeleteAt(i);
       if (split != active) {
-        handled_.Add(active);
+        handled_.push_back(active);
       }
-      PotentiallyRemoveOtherHalf(active, &active_, i);
+      RemoveIntervalAndPotentialOtherHalf(&active_, it);
       AddSorted(unhandled_, split);
       return true;
     }
@@ -953,23 +975,6 @@
   return false;
 }
 
-bool RegisterAllocator::PotentiallyRemoveOtherHalf(LiveInterval* interval,
-                                                   GrowableArray<LiveInterval*>* intervals,
-                                                   size_t index) {
-  if (interval->IsLowInterval()) {
-    DCHECK_EQ(intervals->Get(index), interval->GetHighInterval());
-    intervals->DeleteAt(index);
-    return true;
-  } else if (interval->IsHighInterval()) {
-    DCHECK_GT(index, 0u);
-    DCHECK_EQ(intervals->Get(index - 1), interval->GetLowInterval());
-    intervals->DeleteAt(index - 1);
-    return true;
-  } else {
-    return false;
-  }
-}
-
 // Find the register that is used the last, and spill the interval
 // that holds it. If the first use of `current` is after that register
 // we spill `current` instead.
@@ -1001,8 +1006,7 @@
 
   // For each active interval, find the next use of its register after the
   // start of current.
-  for (size_t i = 0, e = active_.Size(); i < e; ++i) {
-    LiveInterval* active = active_.Get(i);
+  for (LiveInterval* active : active_) {
     DCHECK(active->HasRegister());
     if (active->IsFixed()) {
       next_use[active->GetRegister()] = current->GetStart();
@@ -1016,8 +1020,7 @@
 
   // For each inactive interval, find the next use of its register after the
   // start of current.
-  for (size_t i = 0, e = inactive_.Size(); i < e; ++i) {
-    LiveInterval* inactive = inactive_.Get(i);
+  for (LiveInterval* inactive : inactive_) {
     // Temp/Slow-path-safepoint interval has no holes.
     DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint());
     if (!current->IsSplit() && !inactive->IsFixed()) {
@@ -1087,10 +1090,10 @@
                                                               first_register_use,
                                                               next_use);
       DCHECK(success);
-      LiveInterval* existing = unhandled_->Peek();
+      LiveInterval* existing = unhandled_->back();
       DCHECK(existing->IsHighInterval());
       DCHECK_EQ(existing->GetLowInterval(), current);
-      unhandled_->Add(current);
+      unhandled_->push_back(current);
     } else {
       // If the first use of that instruction is after the last use of the found
       // register, we split this interval just before its first register use.
@@ -1105,23 +1108,24 @@
     // have that register.
     current->SetRegister(reg);
 
-    for (size_t i = 0, e = active_.Size(); i < e; ++i) {
-      LiveInterval* active = active_.Get(i);
+    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
+      LiveInterval* active = *it;
       if (active->GetRegister() == reg) {
         DCHECK(!active->IsFixed());
         LiveInterval* split = Split(active, current->GetStart());
         if (split != active) {
-          handled_.Add(active);
+          handled_.push_back(active);
         }
-        active_.DeleteAt(i);
-        PotentiallyRemoveOtherHalf(active, &active_, i);
+        RemoveIntervalAndPotentialOtherHalf(&active_, it);
         AddSorted(unhandled_, split);
         break;
       }
     }
 
-    for (size_t i = 0; i < inactive_.Size(); ++i) {
-      LiveInterval* inactive = inactive_.Get(i);
+    // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body.
+    for (auto it = inactive_.begin(); it != inactive_.end(); ) {
+      LiveInterval* inactive = *it;
+      bool erased = false;
       if (inactive->GetRegister() == reg) {
         if (!current->IsSplit() && !inactive->IsFixed()) {
           // Neither current nor inactive are fixed.
@@ -1129,43 +1133,43 @@
           // inactive interval should never intersect with that inactive interval.
           // Only if it's not fixed though, because fixed intervals don't come from SSA.
           DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime);
-          continue;
-        }
-        size_t next_intersection = inactive->FirstIntersectionWith(current);
-        if (next_intersection != kNoLifetime) {
-          if (inactive->IsFixed()) {
-            LiveInterval* split = Split(current, next_intersection);
-            DCHECK_NE(split, current);
-            AddSorted(unhandled_, split);
-          } else {
-            // Split at the start of `current`, which will lead to splitting
-            // at the end of the lifetime hole of `inactive`.
-            LiveInterval* split = Split(inactive, current->GetStart());
-            // If it's inactive, it must start before the current interval.
-            DCHECK_NE(split, inactive);
-            inactive_.DeleteAt(i);
-            if (PotentiallyRemoveOtherHalf(inactive, &inactive_, i) && inactive->IsHighInterval()) {
-              // We have removed an entry prior to `inactive`. So we need to decrement.
-              --i;
+        } else {
+          size_t next_intersection = inactive->FirstIntersectionWith(current);
+          if (next_intersection != kNoLifetime) {
+            if (inactive->IsFixed()) {
+              LiveInterval* split = Split(current, next_intersection);
+              DCHECK_NE(split, current);
+              AddSorted(unhandled_, split);
+            } else {
+              // Split at the start of `current`, which will lead to splitting
+              // at the end of the lifetime hole of `inactive`.
+              LiveInterval* split = Split(inactive, current->GetStart());
+              // If it's inactive, it must start before the current interval.
+              DCHECK_NE(split, inactive);
+              it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it);
+              erased = true;
+              handled_.push_back(inactive);
+              AddSorted(unhandled_, split);
             }
-            // Decrement because we have removed `inactive` from the list.
-            --i;
-            handled_.Add(inactive);
-            AddSorted(unhandled_, split);
           }
         }
       }
+      // If we have erased the element, `it` already points to the next element.
+      // Otherwise we need to move to the next element.
+      if (!erased) {
+        ++it;
+      }
     }
 
     return true;
   }
 }
 
-void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval) {
+void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) {
   DCHECK(!interval->IsFixed() && !interval->HasSpillSlot());
   size_t insert_at = 0;
-  for (size_t i = array->Size(); i > 0; --i) {
-    LiveInterval* current = array->Get(i - 1);
+  for (size_t i = array->size(); i > 0; --i) {
+    LiveInterval* current = (*array)[i - 1u];
     // High intervals must be processed right after their low equivalent.
     if (current->StartsAfter(interval) && !current->IsHighInterval()) {
       insert_at = i;
@@ -1173,18 +1177,20 @@
     } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
       // Ensure the slow path interval is the last to be processed at its location: we want the
       // interval to know all live registers at this location.
-      DCHECK(i == 1 || array->Get(i - 2)->StartsAfter(current));
+      DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current));
       insert_at = i;
       break;
     }
   }
 
-  array->InsertAt(insert_at, interval);
   // Insert the high interval before the low, to ensure the low is processed before.
+  auto insert_pos = array->begin() + insert_at;
   if (interval->HasHighInterval()) {
-    array->InsertAt(insert_at, interval->GetHighInterval());
+    array->insert(insert_pos, { interval->GetHighInterval(), interval });
   } else if (interval->HasLowInterval()) {
-    array->InsertAt(insert_at + 1, interval->GetLowInterval());
+    array->insert(insert_pos, { interval, interval->GetLowInterval() });
+  } else {
+    array->insert(insert_pos, interval);
   }
 }
 
@@ -1309,7 +1315,7 @@
     return;
   }
 
-  GrowableArray<size_t>* spill_slots = nullptr;
+  ArenaVector<size_t>* spill_slots = nullptr;
   switch (interval->GetType()) {
     case Primitive::kPrimDouble:
       spill_slots = &double_spill_slots_;
@@ -1334,32 +1340,27 @@
 
   // Find an available spill slot.
   size_t slot = 0;
-  for (size_t e = spill_slots->Size(); slot < e; ++slot) {
-    if (spill_slots->Get(slot) <= parent->GetStart()
-        && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) {
+  for (size_t e = spill_slots->size(); slot < e; ++slot) {
+    if ((*spill_slots)[slot] <= parent->GetStart()
+        && (slot == (e - 1) || (*spill_slots)[slot + 1] <= parent->GetStart())) {
       break;
     }
   }
 
   size_t end = interval->GetLastSibling()->GetEnd();
   if (parent->NeedsTwoSpillSlots()) {
-    if (slot == spill_slots->Size()) {
+    if (slot + 2u > spill_slots->size()) {
       // We need a new spill slot.
-      spill_slots->Add(end);
-      spill_slots->Add(end);
-    } else if (slot == spill_slots->Size() - 1) {
-      spill_slots->Put(slot, end);
-      spill_slots->Add(end);
-    } else {
-      spill_slots->Put(slot, end);
-      spill_slots->Put(slot + 1, end);
+      spill_slots->resize(slot + 2u, end);
     }
+    (*spill_slots)[slot] = end;
+    (*spill_slots)[slot + 1] = end;
   } else {
-    if (slot == spill_slots->Size()) {
+    if (slot == spill_slots->size()) {
       // We need a new spill slot.
-      spill_slots->Add(end);
+      spill_slots->push_back(end);
     } else {
-      spill_slots->Put(slot, end);
+      (*spill_slots)[slot] = end;
     }
   }
 
@@ -1528,10 +1529,10 @@
   DCHECK_EQ(block->NumberOfNormalSuccessors(), 1u);
   HInstruction* last = block->GetLastInstruction();
   // We insert moves at exit for phi predecessors and connecting blocks.
-  // A block ending with an if cannot branch to a block with phis because
-  // we do not allow critical edges. It can also not connect
+  // A block ending with an if or a packed switch cannot branch to a block
+  // with phis because we do not allow critical edges. It can also not connect
   // a split interval between two blocks: the move has to happen in the successor.
-  DCHECK(!last->IsIf());
+  DCHECK(!last->IsIf() && !last->IsPackedSwitch());
   HInstruction* previous = last->GetPrevious();
   HParallelMove* move;
   // This is a parallel move for connecting blocks. We need to differentiate
@@ -1817,13 +1818,13 @@
       size_t slot = current->GetSpillSlot();
       switch (current->GetType()) {
         case Primitive::kPrimDouble:
-          slot += long_spill_slots_.Size();
+          slot += long_spill_slots_.size();
           FALLTHROUGH_INTENDED;
         case Primitive::kPrimLong:
-          slot += float_spill_slots_.Size();
+          slot += float_spill_slots_.size();
           FALLTHROUGH_INTENDED;
         case Primitive::kPrimFloat:
-          slot += int_spill_slots_.Size();
+          slot += int_spill_slots_.size();
           FALLTHROUGH_INTENDED;
         case Primitive::kPrimNot:
         case Primitive::kPrimInt:
@@ -1906,8 +1907,7 @@
   }
 
   // Assign temp locations.
-  for (size_t i = 0; i < temp_intervals_.Size(); ++i) {
-    LiveInterval* temp = temp_intervals_.Get(i);
+  for (LiveInterval* temp : temp_intervals_) {
     if (temp->IsHighInterval()) {
       // High intervals can be skipped, they are already handled by the low interval.
       continue;
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index e030464..58600b7 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -18,9 +18,9 @@
 #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_
 
 #include "arch/instruction_set.h"
+#include "base/arena_containers.h"
 #include "base/macros.h"
 #include "primitive.h"
-#include "utils/growable_array.h"
 
 namespace art {
 
@@ -59,7 +59,7 @@
   }
 
   // Helper method for validation. Used by unit testing.
-  static bool ValidateIntervals(const GrowableArray<LiveInterval*>& intervals,
+  static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals,
                                 size_t number_of_spill_slots,
                                 size_t number_of_out_slots,
                                 const CodeGenerator& codegen,
@@ -70,10 +70,10 @@
   static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
 
   size_t GetNumberOfSpillSlots() const {
-    return int_spill_slots_.Size()
-        + long_spill_slots_.Size()
-        + float_spill_slots_.Size()
-        + double_spill_slots_.Size()
+    return int_spill_slots_.size()
+        + long_spill_slots_.size()
+        + float_spill_slots_.size()
+        + double_spill_slots_.size()
         + catch_phi_spill_slots_;
   }
 
@@ -87,7 +87,7 @@
   void Resolve();
 
   // Add `interval` in the given sorted list.
-  static void AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval);
+  static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval);
 
   // Split `interval` at the position `position`. The new interval starts at `position`.
   LiveInterval* Split(LiveInterval* interval, size_t position);
@@ -159,13 +159,6 @@
                                                 size_t first_register_use,
                                                 size_t* next_use);
 
-  // If `interval` has another half, remove it from the list of `intervals`.
-  // `index` holds the index at which `interval` is in `intervals`.
-  // Returns whether there is another half.
-  bool PotentiallyRemoveOtherHalf(LiveInterval* interval,
-                                  GrowableArray<LiveInterval*>* intervals,
-                                  size_t index);
-
   ArenaAllocator* const allocator_;
   CodeGenerator* const codegen_;
   const SsaLivenessAnalysis& liveness_;
@@ -173,43 +166,43 @@
   // List of intervals for core registers that must be processed, ordered by start
   // position. Last entry is the interval that has the lowest start position.
   // This list is initially populated before doing the linear scan.
-  GrowableArray<LiveInterval*> unhandled_core_intervals_;
+  ArenaVector<LiveInterval*> unhandled_core_intervals_;
 
   // List of intervals for floating-point registers. Same comments as above.
-  GrowableArray<LiveInterval*> unhandled_fp_intervals_;
+  ArenaVector<LiveInterval*> unhandled_fp_intervals_;
 
   // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_`
   // or `unhandled_fp_intervals_`.
-  GrowableArray<LiveInterval*>* unhandled_;
+  ArenaVector<LiveInterval*>* unhandled_;
 
   // List of intervals that have been processed.
-  GrowableArray<LiveInterval*> handled_;
+  ArenaVector<LiveInterval*> handled_;
 
   // List of intervals that are currently active when processing a new live interval.
   // That is, they have a live range that spans the start of the new interval.
-  GrowableArray<LiveInterval*> active_;
+  ArenaVector<LiveInterval*> active_;
 
   // List of intervals that are currently inactive when processing a new live interval.
   // That is, they have a lifetime hole that spans the start of the new interval.
-  GrowableArray<LiveInterval*> inactive_;
+  ArenaVector<LiveInterval*> inactive_;
 
   // Fixed intervals for physical registers. Such intervals cover the positions
   // where an instruction requires a specific register.
-  GrowableArray<LiveInterval*> physical_core_register_intervals_;
-  GrowableArray<LiveInterval*> physical_fp_register_intervals_;
+  ArenaVector<LiveInterval*> physical_core_register_intervals_;
+  ArenaVector<LiveInterval*> physical_fp_register_intervals_;
 
   // Intervals for temporaries. Such intervals cover the positions
   // where an instruction requires a temporary.
-  GrowableArray<LiveInterval*> temp_intervals_;
+  ArenaVector<LiveInterval*> temp_intervals_;
 
   // The spill slots allocated for live intervals. We ensure spill slots
   // are typed to avoid (1) doing moves and swaps between two different kinds
   // of registers, and (2) swapping between a single stack slot and a double
   // stack slot. This simplifies the parallel move resolver.
-  GrowableArray<size_t> int_spill_slots_;
-  GrowableArray<size_t> long_spill_slots_;
-  GrowableArray<size_t> float_spill_slots_;
-  GrowableArray<size_t> double_spill_slots_;
+  ArenaVector<size_t> int_spill_slots_;
+  ArenaVector<size_t> long_spill_slots_;
+  ArenaVector<size_t> float_spill_slots_;
+  ArenaVector<size_t> double_spill_slots_;
 
   // Spill slots allocated to catch phis. This category is special-cased because
   // (1) slots are allocated prior to linear scan and in reverse linear order,
@@ -217,7 +210,7 @@
   size_t catch_phi_spill_slots_;
 
   // Instructions that need a safepoint.
-  GrowableArray<HInstruction*> safepoints_;
+  ArenaVector<HInstruction*> safepoints_;
 
   // True if processing core registers. False if processing floating
   // point registers.
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index b72df86..2bb5a8b 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -64,83 +64,83 @@
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
-  GrowableArray<LiveInterval*> intervals(&allocator, 0);
+  ArenaVector<LiveInterval*> intervals(allocator.Adapter());
 
   // Test with two intervals of the same range.
   {
     static constexpr size_t ranges[][2] = {{0, 42}};
-    intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 0));
-    intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 1));
+    intervals.push_back(BuildInterval(ranges, arraysize(ranges), &allocator, 0));
+    intervals.push_back(BuildInterval(ranges, arraysize(ranges), &allocator, 1));
     ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
 
-    intervals.Get(1)->SetRegister(0);
+    intervals[1]->SetRegister(0);
     ASSERT_FALSE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
-    intervals.Reset();
+    intervals.clear();
   }
 
   // Test with two non-intersecting intervals.
   {
     static constexpr size_t ranges1[][2] = {{0, 42}};
-    intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
+    intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
     static constexpr size_t ranges2[][2] = {{42, 43}};
-    intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
+    intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
     ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
 
-    intervals.Get(1)->SetRegister(0);
+    intervals[1]->SetRegister(0);
     ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
-    intervals.Reset();
+    intervals.clear();
   }
 
   // Test with two non-intersecting intervals, with one with a lifetime hole.
   {
     static constexpr size_t ranges1[][2] = {{0, 42}, {45, 48}};
-    intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
+    intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
     static constexpr size_t ranges2[][2] = {{42, 43}};
-    intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
+    intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
     ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
 
-    intervals.Get(1)->SetRegister(0);
+    intervals[1]->SetRegister(0);
     ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
-    intervals.Reset();
+    intervals.clear();
   }
 
   // Test with intersecting intervals.
   {
     static constexpr size_t ranges1[][2] = {{0, 42}, {44, 48}};
-    intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
+    intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
     static constexpr size_t ranges2[][2] = {{42, 47}};
-    intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
+    intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
     ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
 
-    intervals.Get(1)->SetRegister(0);
+    intervals[1]->SetRegister(0);
     ASSERT_FALSE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
-    intervals.Reset();
+    intervals.clear();
   }
 
   // Test with siblings.
   {
     static constexpr size_t ranges1[][2] = {{0, 42}, {44, 48}};
-    intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
-    intervals.Get(0)->SplitAt(43);
+    intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0));
+    intervals[0]->SplitAt(43);
     static constexpr size_t ranges2[][2] = {{42, 47}};
-    intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
+    intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1));
     ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
 
-    intervals.Get(1)->SetRegister(0);
+    intervals[1]->SetRegister(0);
     // Sibling of the first interval has no register allocated to it.
     ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
 
-    intervals.Get(0)->GetNextSibling()->SetRegister(0);
+    intervals[0]->GetNextSibling()->SetRegister(0);
     ASSERT_FALSE(RegisterAllocator::ValidateIntervals(
         intervals, 0, 0, codegen, &allocator, true, false));
   }
@@ -429,7 +429,7 @@
 
   // Populate the instructions in the liveness object, to please the register allocator.
   for (size_t i = 0; i < 60; ++i) {
-    liveness.instructions_from_lifetime_position_.Add(
+    liveness.instructions_from_lifetime_position_.push_back(
         graph->GetEntryBlock()->GetFirstInstruction());
   }
 
@@ -442,15 +442,15 @@
   // we do not depend on an order.
   LiveInterval* interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt);
   interval->AddRange(40, 50);
-  register_allocator.inactive_.Add(interval);
+  register_allocator.inactive_.push_back(interval);
 
   interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt);
   interval->AddRange(20, 30);
-  register_allocator.inactive_.Add(interval);
+  register_allocator.inactive_.push_back(interval);
 
   interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt);
   interval->AddRange(60, 70);
-  register_allocator.inactive_.Add(interval);
+  register_allocator.inactive_.push_back(interval);
 
   register_allocator.number_of_registers_ = 1;
   register_allocator.registers_array_ = allocator.AllocArray<size_t>(1);
@@ -460,10 +460,10 @@
   ASSERT_TRUE(register_allocator.TryAllocateFreeReg(unhandled));
 
   // Check that we have split the interval.
-  ASSERT_EQ(1u, register_allocator.unhandled_->Size());
+  ASSERT_EQ(1u, register_allocator.unhandled_->size());
   // Check that we know need to find a new register where the next interval
   // that uses the register starts.
-  ASSERT_EQ(20u, register_allocator.unhandled_->Get(0)->GetStart());
+  ASSERT_EQ(20u, register_allocator.unhandled_->front()->GetStart());
 }
 
 static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
@@ -678,7 +678,7 @@
 
     // Check that the field gets put in the register expected by its use.
     // Don't use SetInAt because we are overriding an already allocated location.
-    ret->GetLocations()->inputs_.Put(0, Location::RegisterLocation(2));
+    ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
 
     RegisterAllocator register_allocator(&allocator, &codegen, liveness);
     register_allocator.AllocateRegisters();
@@ -885,14 +885,14 @@
   SsaLivenessAnalysis liveness(graph, &codegen);
   // Populate the instructions in the liveness object, to please the register allocator.
   for (size_t i = 0; i < 32; ++i) {
-    liveness.instructions_from_lifetime_position_.Add(user);
+    liveness.instructions_from_lifetime_position_.push_back(user);
   }
 
   RegisterAllocator register_allocator(&allocator, &codegen, liveness);
-  register_allocator.unhandled_core_intervals_.Add(fourth);
-  register_allocator.unhandled_core_intervals_.Add(third);
-  register_allocator.unhandled_core_intervals_.Add(second);
-  register_allocator.unhandled_core_intervals_.Add(first);
+  register_allocator.unhandled_core_intervals_.push_back(fourth);
+  register_allocator.unhandled_core_intervals_.push_back(third);
+  register_allocator.unhandled_core_intervals_.push_back(second);
+  register_allocator.unhandled_core_intervals_.push_back(first);
 
   // Set just one register available to make all intervals compete for the same.
   register_allocator.number_of_registers_ = 1;
@@ -902,11 +902,11 @@
   register_allocator.LinearScan();
 
   // Test that there is no conflicts between intervals.
-  GrowableArray<LiveInterval*> intervals(&allocator, 0);
-  intervals.Add(first);
-  intervals.Add(second);
-  intervals.Add(third);
-  intervals.Add(fourth);
+  ArenaVector<LiveInterval*> intervals(allocator.Adapter());
+  intervals.push_back(first);
+  intervals.push_back(second);
+  intervals.push_back(third);
+  intervals.push_back(fourth);
   ASSERT_TRUE(RegisterAllocator::ValidateIntervals(
       intervals, 0, 0, codegen, &allocator, true, false));
 }
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index 1956781..338a3aa 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -21,8 +21,8 @@
 void SideEffectsAnalysis::Run() {
   // Inlining might have created more blocks, so we need to increase the size
   // if needed.
-  block_effects_.SetSize(graph_->GetBlocks().size());
-  loop_effects_.SetSize(graph_->GetBlocks().size());
+  block_effects_.resize(graph_->GetBlocks().size());
+  loop_effects_.resize(graph_->GetBlocks().size());
 
   // In DEBUG mode, ensure side effects are properly initialized to empty.
   if (kIsDebugBuild) {
@@ -54,7 +54,7 @@
       }
     }
 
-    block_effects_.Put(block->GetBlockId(), effects);
+    block_effects_[block->GetBlockId()] = effects;
 
     if (block->IsLoopHeader()) {
       // The side effects of the loop header are part of the loop.
@@ -76,16 +76,19 @@
 
 SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const {
   DCHECK(block->IsLoopHeader());
-  return loop_effects_.Get(block->GetBlockId());
+  DCHECK_LT(block->GetBlockId(), loop_effects_.size());
+  return loop_effects_[block->GetBlockId()];
 }
 
 SideEffects SideEffectsAnalysis::GetBlockEffects(HBasicBlock* block) const {
-  return block_effects_.Get(block->GetBlockId());
+  DCHECK_LT(block->GetBlockId(), block_effects_.size());
+  return block_effects_[block->GetBlockId()];
 }
 
 void SideEffectsAnalysis::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) {
-  int id = info->GetHeader()->GetBlockId();
-  loop_effects_.Put(id, loop_effects_.Get(id).Union(effects));
+  uint32_t id = info->GetHeader()->GetBlockId();
+  DCHECK_LT(id, loop_effects_.size());
+  loop_effects_[id] = loop_effects_[id].Union(effects);
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h
index 9888140..bac6088 100644
--- a/compiler/optimizing/side_effects_analysis.h
+++ b/compiler/optimizing/side_effects_analysis.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_
 #define ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_
 
+#include "base/arena_containers.h"
 #include "nodes.h"
 #include "optimization.h"
 
@@ -27,8 +28,10 @@
   explicit SideEffectsAnalysis(HGraph* graph)
       : HOptimization(graph, kSideEffectsAnalysisPassName),
         graph_(graph),
-        block_effects_(graph->GetArena(), graph->GetBlocks().size(), SideEffects::None()),
-        loop_effects_(graph->GetArena(), graph->GetBlocks().size(), SideEffects::None()) {}
+        block_effects_(graph->GetBlocks().size(),
+                       graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)),
+        loop_effects_(graph->GetBlocks().size(),
+                      graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)) {}
 
   SideEffects GetLoopEffects(HBasicBlock* block) const;
   SideEffects GetBlockEffects(HBasicBlock* block) const;
@@ -51,11 +54,11 @@
 
   // Side effects of individual blocks, that is the union of the side effects
   // of the instructions in the block.
-  GrowableArray<SideEffects> block_effects_;
+  ArenaVector<SideEffects> block_effects_;
 
   // Side effects of loops, that is the union of the side effects of the
   // blocks contained in that loop.
-  GrowableArray<SideEffects> loop_effects_;
+  ArenaVector<SideEffects> loop_effects_;
 
   ART_FRIEND_TEST(GVNTest, LoopSideEffects);
   DISALLOW_COPY_AND_ASSIGN(SideEffectsAnalysis);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 0ef86d8..40c75af 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -56,6 +56,24 @@
   DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling);
 };
 
+static bool HasConflictingEquivalent(HPhi* phi) {
+  if (phi->GetNext() == nullptr) {
+    return false;
+  }
+  HPhi* next = phi->GetNext()->AsPhi();
+  if (next->GetRegNumber() == phi->GetRegNumber()) {
+    if (next->GetType() == Primitive::kPrimVoid) {
+      // We only get a void type for an equivalent phi we processed and found out
+      // it was conflicting.
+      return true;
+    } else {
+      // Go to the next phi, in case it is also an equivalent.
+      return HasConflictingEquivalent(next);
+    }
+  }
+  return false;
+}
+
 bool DeadPhiHandling::UpdateType(HPhi* phi) {
   if (phi->IsDead()) {
     // Phi was rendered dead while waiting in the worklist because it was replaced
@@ -87,21 +105,26 @@
     if (new_type == Primitive::kPrimVoid) {
       new_type = input_type;
     } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) {
+      if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) {
+        // If we already asked for an equivalent of the input phi, but that equivalent
+        // ended up conflicting, make this phi conflicting too.
+        conflict = true;
+        break;
+      }
       HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input);
       if (equivalent == nullptr) {
         conflict = true;
         break;
-      } else {
-        phi->ReplaceInput(equivalent, i);
-        if (equivalent->IsPhi()) {
-          DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot);
-          // We created a new phi, but that phi has the same inputs as the old phi. We
-          // add it to the worklist to ensure its inputs can also be converted to reference.
-          // If not, it will remain dead, and the algorithm will make the current phi dead
-          // as well.
-          equivalent->AsPhi()->SetLive();
-          AddToWorklist(equivalent->AsPhi());
-        }
+      }
+      phi->ReplaceInput(equivalent, i);
+      if (equivalent->IsPhi()) {
+        DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot);
+        // We created a new phi, but that phi has the same inputs as the old phi. We
+        // add it to the worklist to ensure its inputs can also be converted to reference.
+        // If not, it will remain dead, and the algorithm will make the current phi dead
+        // as well.
+        equivalent->AsPhi()->SetLive();
+        AddToWorklist(equivalent->AsPhi());
       }
     } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) {
       new_type = Primitive::kPrimNot;
@@ -145,8 +168,14 @@
     if (phi->IsDead() && phi->HasEnvironmentUses()) {
       phi->SetLive();
       if (block->IsLoopHeader()) {
-        // Give a type to the loop phi, to guarantee convergence of the algorithm.
-        phi->SetType(phi->InputAt(0)->GetType());
+        // Give a type to the loop phi to guarantee convergence of the algorithm.
+        // Note that the dead phi may already have a type if it is an equivalent
+        // generated for a typed LoadLocal. In that case we do not change the
+        // type because it could lead to an unsupported PrimNot/Float/Double ->
+        // PrimInt/Long transition and create same type equivalents.
+        if (phi->GetType() == Primitive::kPrimVoid) {
+          phi->SetType(phi->InputAt(0)->GetType());
+        }
         AddToWorklist(phi);
       } else {
         // Because we are doing a reverse post order visit, all inputs of
@@ -191,12 +220,6 @@
   ProcessWorklist();
 }
 
-static bool IsPhiEquivalentOf(HInstruction* instruction, HPhi* phi) {
-  return instruction != nullptr
-      && instruction->IsPhi()
-      && instruction->AsPhi()->GetRegNumber() == phi->GetRegNumber();
-}
-
 void SsaBuilder::FixNullConstantType() {
   // The order doesn't matter here.
   for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
@@ -324,13 +347,13 @@
       // If the phi is not dead, or has no environment uses, there is nothing to do.
       if (!phi->IsDead() || !phi->HasEnvironmentUses()) continue;
       HInstruction* next = phi->GetNext();
-      if (!IsPhiEquivalentOf(next, phi)) continue;
+      if (!phi->IsVRegEquivalentOf(next)) continue;
       if (next->AsPhi()->IsDead()) {
         // If the phi equivalent is dead, check if there is another one.
         next = next->GetNext();
-        if (!IsPhiEquivalentOf(next, phi)) continue;
+        if (!phi->IsVRegEquivalentOf(next)) continue;
         // There can be at most two phi equivalents.
-        DCHECK(!IsPhiEquivalentOf(next->GetNext(), phi));
+        DCHECK(!phi->IsVRegEquivalentOf(next->GetNext()));
         if (next->AsPhi()->IsDead()) continue;
       }
       // We found a live phi equivalent. Update the environment uses of `phi` with it.
@@ -403,6 +426,24 @@
 
   if (block->IsCatchBlock()) {
     // Catch phis were already created and inputs collected from throwing sites.
+    if (kIsDebugBuild) {
+      // Make sure there was at least one throwing instruction which initialized
+      // locals (guaranteed by HGraphBuilder) and that all try blocks have been
+      // visited already (from HTryBoundary scoping and reverse post order).
+      bool throwing_instruction_found = false;
+      bool catch_block_visited = false;
+      for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+        HBasicBlock* current = it.Current();
+        if (current == block) {
+          catch_block_visited = true;
+        } else if (current->IsTryBlock() &&
+                   current->GetTryCatchInformation()->GetTryEntry().HasExceptionHandler(*block)) {
+          DCHECK(!catch_block_visited) << "Catch block visited before its try block.";
+          throwing_instruction_found |= current->HasThrowingInstructions();
+        }
+      }
+      DCHECK(throwing_instruction_found) << "No instructions throwing into a live catch block.";
+    }
   } else if (block->IsLoopHeader()) {
     // If the block is a loop header, we know we only have visited the pre header
     // because we are visiting in reverse post order. We create phis for all initialized
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 1e9a813..b869d57 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -43,11 +43,11 @@
       && inner->IsIn(*outer);
 }
 
-static void AddToListForLinearization(GrowableArray<HBasicBlock*>* worklist, HBasicBlock* block) {
-  size_t insert_at = worklist->Size();
+static void AddToListForLinearization(ArenaVector<HBasicBlock*>* worklist, HBasicBlock* block) {
   HLoopInformation* block_loop = block->GetLoopInformation();
-  for (; insert_at > 0; --insert_at) {
-    HBasicBlock* current = worklist->Get(insert_at - 1);
+  auto insert_pos = worklist->rbegin();  // insert_pos.base() will be the actual position.
+  for (auto end = worklist->rend(); insert_pos != end; ++insert_pos) {
+    HBasicBlock* current = *insert_pos;
     HLoopInformation* current_loop = current->GetLoopInformation();
     if (InSameLoop(block_loop, current_loop)
         || !IsLoop(current_loop)
@@ -56,7 +56,7 @@
       break;
     }
   }
-  worklist->InsertAt(insert_at, block);
+  worklist->insert(insert_pos.base(), block);
 }
 
 void SsaLivenessAnalysis::LinearizeGraph() {
@@ -69,15 +69,15 @@
   //      current reverse post order in the graph, but it would require making
   //      order queries to a GrowableArray, which is not the best data structure
   //      for it.
-  GrowableArray<uint32_t> forward_predecessors(graph_->GetArena(), graph_->GetBlocks().size());
-  forward_predecessors.SetSize(graph_->GetBlocks().size());
+  ArenaVector<uint32_t> forward_predecessors(graph_->GetBlocks().size(),
+                                             graph_->GetArena()->Adapter(kArenaAllocSsaLiveness));
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     size_t number_of_forward_predecessors = block->GetPredecessors().size();
     if (block->IsLoopHeader()) {
       number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
     }
-    forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors);
+    forward_predecessors[block->GetBlockId()] = number_of_forward_predecessors;
   }
 
   // (2): Following a worklist approach, first start with the entry block, and
@@ -85,20 +85,21 @@
   //      successor block are visited, the successor block is added in the worklist
   //      following an order that satisfies the requirements to build our linear graph.
   graph_->linear_order_.reserve(graph_->GetReversePostOrder().size());
-  GrowableArray<HBasicBlock*> worklist(graph_->GetArena(), 1);
-  worklist.Add(graph_->GetEntryBlock());
+  ArenaVector<HBasicBlock*> worklist(graph_->GetArena()->Adapter(kArenaAllocSsaLiveness));
+  worklist.push_back(graph_->GetEntryBlock());
   do {
-    HBasicBlock* current = worklist.Pop();
+    HBasicBlock* current = worklist.back();
+    worklist.pop_back();
     graph_->linear_order_.push_back(current);
     for (HBasicBlock* successor : current->GetSuccessors()) {
       int block_id = successor->GetBlockId();
-      size_t number_of_remaining_predecessors = forward_predecessors.Get(block_id);
+      size_t number_of_remaining_predecessors = forward_predecessors[block_id];
       if (number_of_remaining_predecessors == 1) {
         AddToListForLinearization(&worklist, successor);
       }
-      forward_predecessors.Put(block_id, number_of_remaining_predecessors - 1);
+      forward_predecessors[block_id] = number_of_remaining_predecessors - 1;
     }
-  } while (!worklist.IsEmpty());
+  } while (!worklist.empty());
 }
 
 void SsaLivenessAnalysis::NumberInstructions() {
@@ -122,7 +123,7 @@
       codegen_->AllocateLocations(current);
       LocationSummary* locations = current->GetLocations();
       if (locations != nullptr && locations->Out().IsValid()) {
-        instructions_from_ssa_index_.Add(current);
+        instructions_from_ssa_index_.push_back(current);
         current->SetSsaIndex(ssa_index++);
         current->SetLiveInterval(
             LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current));
@@ -132,7 +133,7 @@
     lifetime_position += 2;
 
     // Add a null marker to notify we are starting a block.
-    instructions_from_lifetime_position_.Add(nullptr);
+    instructions_from_lifetime_position_.push_back(nullptr);
 
     for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
          inst_it.Advance()) {
@@ -140,12 +141,12 @@
       codegen_->AllocateLocations(current);
       LocationSummary* locations = current->GetLocations();
       if (locations != nullptr && locations->Out().IsValid()) {
-        instructions_from_ssa_index_.Add(current);
+        instructions_from_ssa_index_.push_back(current);
         current->SetSsaIndex(ssa_index++);
         current->SetLiveInterval(
             LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current));
       }
-      instructions_from_lifetime_position_.Add(current);
+      instructions_from_lifetime_position_.push_back(current);
       current->SetLifetimePosition(lifetime_position);
       lifetime_position += 2;
     }
@@ -158,9 +159,9 @@
 void SsaLivenessAnalysis::ComputeLiveness() {
   for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    block_infos_.Put(
-        block->GetBlockId(),
-        new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_));
+    DCHECK_LT(block->GetBlockId(), block_infos_.size());
+    block_infos_[block->GetBlockId()] =
+        new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_);
   }
 
   // Compute the live ranges, as well as the initial live_in, live_out, and kill sets.
@@ -212,7 +213,7 @@
     // Add a range that covers this block to all instructions live_in because of successors.
     // Instructions defined in this block will have their start of the range adjusted.
     for (uint32_t idx : live_in->Indexes()) {
-      HInstruction* current = instructions_from_ssa_index_.Get(idx);
+      HInstruction* current = GetInstructionFromSsaIndex(idx);
       current->GetLiveInterval()->AddRange(block->GetLifetimeStart(), block->GetLifetimeEnd());
     }
 
@@ -277,7 +278,7 @@
       // For all live_in instructions at the loop header, we need to create a range
       // that covers the full loop.
       for (uint32_t idx : live_in->Indexes()) {
-        HInstruction* current = instructions_from_ssa_index_.Get(idx);
+        HInstruction* current = GetInstructionFromSsaIndex(idx);
         current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position);
       }
     }
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 3aedaa5..e4b0999 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -27,7 +27,7 @@
 
 static constexpr int kNoRegister = -1;
 
-class BlockInfo : public ArenaObject<kArenaAllocMisc> {
+class BlockInfo : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
   BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values)
       : block_(block),
@@ -55,7 +55,7 @@
  * A live range contains the start and end of a range where an instruction or a temporary
  * is live.
  */
-class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> {
+class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
   LiveRange(size_t start, size_t end, LiveRange* next) : start_(start), end_(end), next_(next) {
     DCHECK_LT(start, end);
@@ -101,7 +101,7 @@
 /**
  * A use position represents a live interval use at a given position.
  */
-class UsePosition : public ArenaObject<kArenaAllocMisc> {
+class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
   UsePosition(HInstruction* user,
               HEnvironment* environment,
@@ -169,7 +169,7 @@
   DISALLOW_COPY_AND_ASSIGN(UsePosition);
 };
 
-class SafepointPosition : public ArenaObject<kArenaAllocMisc> {
+class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
   explicit SafepointPosition(HInstruction* instruction)
       : instruction_(instruction),
@@ -206,7 +206,7 @@
  * An interval is a list of disjoint live ranges where an instruction is live.
  * Each instruction that has uses gets an interval.
  */
-class LiveInterval : public ArenaObject<kArenaAllocMisc> {
+class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
   static LiveInterval* MakeInterval(ArenaAllocator* allocator,
                                     Primitive::Type type,
@@ -1106,33 +1106,39 @@
   SsaLivenessAnalysis(HGraph* graph, CodeGenerator* codegen)
       : graph_(graph),
         codegen_(codegen),
-        block_infos_(graph->GetArena(), graph->GetBlocks().size()),
-        instructions_from_ssa_index_(graph->GetArena(), 0),
-        instructions_from_lifetime_position_(graph->GetArena(), 0),
+        block_infos_(graph->GetBlocks().size(),
+                     nullptr,
+                     graph->GetArena()->Adapter(kArenaAllocSsaLiveness)),
+        instructions_from_ssa_index_(graph->GetArena()->Adapter(kArenaAllocSsaLiveness)),
+        instructions_from_lifetime_position_(graph->GetArena()->Adapter(kArenaAllocSsaLiveness)),
         number_of_ssa_values_(0) {
-    block_infos_.SetSize(graph->GetBlocks().size());
   }
 
   void Analyze();
 
   BitVector* GetLiveInSet(const HBasicBlock& block) const {
-    return &block_infos_.Get(block.GetBlockId())->live_in_;
+    DCHECK_LT(block.GetBlockId(), block_infos_.size());
+    return &block_infos_[block.GetBlockId()]->live_in_;
   }
 
   BitVector* GetLiveOutSet(const HBasicBlock& block) const {
-    return &block_infos_.Get(block.GetBlockId())->live_out_;
+    DCHECK_LT(block.GetBlockId(), block_infos_.size());
+    return &block_infos_[block.GetBlockId()]->live_out_;
   }
 
   BitVector* GetKillSet(const HBasicBlock& block) const {
-    return &block_infos_.Get(block.GetBlockId())->kill_;
+    DCHECK_LT(block.GetBlockId(), block_infos_.size());
+    return &block_infos_[block.GetBlockId()]->kill_;
   }
 
   HInstruction* GetInstructionFromSsaIndex(size_t index) const {
-    return instructions_from_ssa_index_.Get(index);
+    DCHECK_LT(index, instructions_from_ssa_index_.size());
+    return instructions_from_ssa_index_[index];
   }
 
   HInstruction* GetInstructionFromPosition(size_t index) const {
-    return instructions_from_lifetime_position_.Get(index);
+    DCHECK_LT(index, instructions_from_lifetime_position_.size());
+    return instructions_from_lifetime_position_[index];
   }
 
   HBasicBlock* GetBlockFromPosition(size_t index) const {
@@ -1163,7 +1169,7 @@
   }
 
   size_t GetMaxLifetimePosition() const {
-    return instructions_from_lifetime_position_.Size() * 2 - 1;
+    return instructions_from_lifetime_position_.size() * 2 - 1;
   }
 
   size_t GetNumberOfSsaValues() const {
@@ -1218,13 +1224,13 @@
 
   HGraph* const graph_;
   CodeGenerator* const codegen_;
-  GrowableArray<BlockInfo*> block_infos_;
+  ArenaVector<BlockInfo*> block_infos_;
 
   // Temporary array used when computing live_in, live_out, and kill sets.
-  GrowableArray<HInstruction*> instructions_from_ssa_index_;
+  ArenaVector<HInstruction*> instructions_from_ssa_index_;
 
   // Temporary array used when inserting moves in the graph.
-  GrowableArray<HInstruction*> instructions_from_lifetime_position_;
+  ArenaVector<HInstruction*> instructions_from_lifetime_position_;
   size_t number_of_ssa_values_;
 
   ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive);
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index a9f04cd..72f9ddd 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -35,7 +35,7 @@
         HUseListNode<HInstruction*>* current = use_it.Current();
         HInstruction* user = current->GetUser();
         if (!user->IsPhi()) {
-          worklist_.Add(phi);
+          worklist_.push_back(phi);
           phi->SetLive();
           break;
         }
@@ -44,12 +44,13 @@
   }
 
   // Process the worklist by propagating liveness to phi inputs.
-  while (!worklist_.IsEmpty()) {
-    HPhi* phi = worklist_.Pop();
+  while (!worklist_.empty()) {
+    HPhi* phi = worklist_.back();
+    worklist_.pop_back();
     for (HInputIterator it(phi); !it.Done(); it.Advance()) {
       HInstruction* input = it.Current();
       if (input->IsPhi() && input->AsPhi()->IsDead()) {
-        worklist_.Add(input->AsPhi());
+        worklist_.push_back(input->AsPhi());
         input->AsPhi()->SetLive();
       }
     }
@@ -103,12 +104,13 @@
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-      worklist_.Add(inst_it.Current()->AsPhi());
+      worklist_.push_back(inst_it.Current()->AsPhi());
     }
   }
 
-  while (!worklist_.IsEmpty()) {
-    HPhi* phi = worklist_.Pop();
+  while (!worklist_.empty()) {
+    HPhi* phi = worklist_.back();
+    worklist_.pop_back();
 
     // If the phi has already been processed, continue.
     if (!phi->IsInBlock()) {
@@ -155,7 +157,7 @@
       HUseListNode<HInstruction*>* current = it.Current();
       HInstruction* user = current->GetUser();
       if (user->IsPhi()) {
-        worklist_.Add(user->AsPhi());
+        worklist_.push_back(user->AsPhi());
       }
     }
 
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
index 67351f2..b48e820 100644
--- a/compiler/optimizing/ssa_phi_elimination.h
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
 #define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
 
+#include "base/arena_containers.h"
 #include "nodes.h"
 #include "optimization.h"
 
@@ -30,7 +31,9 @@
  public:
   explicit SsaDeadPhiElimination(HGraph* graph)
       : HOptimization(graph, kSsaDeadPhiEliminationPassName),
-        worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+        worklist_(graph->GetArena()->Adapter(kArenaAllocSsaPhiElimination)) {
+    worklist_.reserve(kDefaultWorklistSize);
+  }
 
   void Run() OVERRIDE;
 
@@ -40,7 +43,7 @@
   static constexpr const char* kSsaDeadPhiEliminationPassName = "dead_phi_elimination";
 
  private:
-  GrowableArray<HPhi*> worklist_;
+  ArenaVector<HPhi*> worklist_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
 
@@ -57,14 +60,16 @@
  public:
   explicit SsaRedundantPhiElimination(HGraph* graph)
       : HOptimization(graph, kSsaRedundantPhiEliminationPassName),
-        worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+        worklist_(graph->GetArena()->Adapter(kArenaAllocSsaPhiElimination)) {
+    worklist_.reserve(kDefaultWorklistSize);
+  }
 
   void Run() OVERRIDE;
 
   static constexpr const char* kSsaRedundantPhiEliminationPassName = "redundant_phi_elimination";
 
  private:
-  GrowableArray<HPhi*> worklist_;
+  ArenaVector<HPhi*> worklist_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
 
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1f0bac5..f27cecc 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -30,8 +30,8 @@
   current_entry_.sp_mask = sp_mask;
   current_entry_.num_dex_registers = num_dex_registers;
   current_entry_.inlining_depth = inlining_depth;
-  current_entry_.dex_register_locations_start_index = dex_register_locations_.Size();
-  current_entry_.inline_infos_start_index = inline_infos_.Size();
+  current_entry_.dex_register_locations_start_index = dex_register_locations_.size();
+  current_entry_.inline_infos_start_index = inline_infos_.size();
   current_entry_.dex_register_map_hash = 0;
   current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
   if (num_dex_registers != 0) {
@@ -55,7 +55,7 @@
 
 void StackMapStream::EndStackMapEntry() {
   current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap();
-  stack_maps_.Add(current_entry_);
+  stack_maps_.push_back(current_entry_);
   current_entry_ = StackMapEntry();
 }
 
@@ -73,12 +73,12 @@
     auto it = location_catalog_entries_indices_.Find(location);
     if (it != location_catalog_entries_indices_.end()) {
       // Retrieve the index from the hash map.
-      dex_register_locations_.Add(it->second);
+      dex_register_locations_.push_back(it->second);
     } else {
       // Create a new entry in the location catalog and the hash map.
-      size_t index = location_catalog_entries_.Size();
-      location_catalog_entries_.Add(location);
-      dex_register_locations_.Add(index);
+      size_t index = location_catalog_entries_.size();
+      location_catalog_entries_.push_back(location);
+      dex_register_locations_.push_back(index);
       location_catalog_entries_indices_.Insert(std::make_pair(location, index));
     }
 
@@ -108,7 +108,7 @@
   current_inline_info_.dex_pc = dex_pc;
   current_inline_info_.invoke_type = invoke_type;
   current_inline_info_.num_dex_registers = num_dex_registers;
-  current_inline_info_.dex_register_locations_start_index = dex_register_locations_.Size();
+  current_inline_info_.dex_register_locations_start_index = dex_register_locations_.size();
   if (num_dex_registers != 0) {
     current_inline_info_.live_dex_registers_mask =
         new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true);
@@ -123,14 +123,14 @@
   DCHECK_EQ(current_dex_register_, current_inline_info_.num_dex_registers)
       << "Inline information contains less registers than expected";
   in_inline_frame_ = false;
-  inline_infos_.Add(current_inline_info_);
+  inline_infos_.push_back(current_inline_info_);
   current_inline_info_ = InlineInfoEntry();
 }
 
 uint32_t StackMapStream::ComputeMaxNativePcOffset() const {
   uint32_t max_native_pc_offset = 0u;
-  for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) {
-    max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset);
+  for (const StackMapEntry& entry : stack_maps_) {
+    max_native_pc_offset = std::max(max_native_pc_offset, entry.native_pc_offset);
   }
   return max_native_pc_offset;
 }
@@ -147,7 +147,7 @@
                                                           dex_pc_max_,
                                                           max_native_pc_offset,
                                                           register_mask_max_);
-  stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize();
+  stack_maps_size_ = stack_maps_.size() * stack_map_encoding_.ComputeStackMapSize();
   dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
 
   // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned.
@@ -170,33 +170,28 @@
 
 size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const {
   size_t size = DexRegisterLocationCatalog::kFixedSize;
-  for (size_t location_catalog_entry_index = 0;
-       location_catalog_entry_index < location_catalog_entries_.Size();
-       ++location_catalog_entry_index) {
-    DexRegisterLocation dex_register_location =
-        location_catalog_entries_.Get(location_catalog_entry_index);
+  for (const DexRegisterLocation& dex_register_location : location_catalog_entries_) {
     size += DexRegisterLocationCatalog::EntrySize(dex_register_location);
   }
   return size;
 }
 
 size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers,
-                                                 const BitVector& live_dex_registers_mask) const {
+                                                 const BitVector* live_dex_registers_mask) const {
+  // For num_dex_registers == 0u live_dex_registers_mask may be null.
+  if (num_dex_registers == 0u) {
+    return 0u;  // No register map will be emitted.
+  }
+  DCHECK(live_dex_registers_mask != nullptr);
+
   // Size of the map in bytes.
   size_t size = DexRegisterMap::kFixedSize;
   // Add the live bit mask for the Dex register liveness.
   size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers);
   // Compute the size of the set of live Dex register entries.
-  size_t number_of_live_dex_registers = 0;
-  for (size_t dex_register_number = 0;
-       dex_register_number < num_dex_registers;
-       ++dex_register_number) {
-    if (live_dex_registers_mask.IsBitSet(dex_register_number)) {
-      ++number_of_live_dex_registers;
-    }
-  }
+  size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits();
   size_t map_entries_size_in_bits =
-      DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size())
+      DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.size())
       * number_of_live_dex_registers;
   size_t map_entries_size_in_bytes =
       RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte;
@@ -207,24 +202,24 @@
 size_t StackMapStream::ComputeDexRegisterMapsSize() const {
   size_t size = 0;
   size_t inline_info_index = 0;
-  for (size_t i = 0; i < stack_maps_.Size(); ++i) {
-    StackMapEntry entry = stack_maps_.Get(i);
+  for (const StackMapEntry& entry : stack_maps_) {
     if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) {
-      size += ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask);
+      size += ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask);
     } else {
       // Entries with the same dex map will have the same offset.
     }
     for (size_t j = 0; j < entry.inlining_depth; ++j) {
-      InlineInfoEntry inline_entry = inline_infos_.Get(inline_info_index++);
+      DCHECK_LT(inline_info_index, inline_infos_.size());
+      InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
       size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
-                                        *inline_entry.live_dex_registers_mask);
+                                        inline_entry.live_dex_registers_mask);
     }
   }
   return size;
 }
 
 size_t StackMapStream::ComputeInlineInfoSize() const {
-  return inline_infos_.Size() * InlineInfo::SingleEntrySize()
+  return inline_infos_.size() * InlineInfo::SingleEntrySize()
     // For encoding the depth.
     + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
 }
@@ -244,19 +239,18 @@
       inline_infos_start_, inline_info_size_);
 
   code_info.SetEncoding(stack_map_encoding_);
-  code_info.SetNumberOfStackMaps(stack_maps_.Size());
+  code_info.SetNumberOfStackMaps(stack_maps_.size());
   DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_);
 
   // Set the Dex register location catalog.
-  code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.Size());
+  code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.size());
   MemoryRegion dex_register_location_catalog_region = region.Subregion(
       dex_register_location_catalog_start_, dex_register_location_catalog_size_);
   DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
   // Offset in `dex_register_location_catalog` where to store the next
   // register location.
   size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize;
-  for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) {
-    DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i);
+  for (DexRegisterLocation dex_register_location : location_catalog_entries_) {
     dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location);
     location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location);
   }
@@ -265,9 +259,9 @@
 
   uintptr_t next_dex_register_map_offset = 0;
   uintptr_t next_inline_info_offset = 0;
-  for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) {
+  for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) {
     StackMap stack_map = code_info.GetStackMapAt(i, stack_map_encoding_);
-    StackMapEntry entry = stack_maps_.Get(i);
+    StackMapEntry entry = stack_maps_[i];
 
     stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc);
     stack_map.SetNativePcOffset(stack_map_encoding_, entry.native_pc_offset);
@@ -291,7 +285,7 @@
         // New dex registers maps should be added to the stack map.
         MemoryRegion register_region = dex_register_locations_region.Subregion(
             next_dex_register_map_offset,
-            ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask));
+            ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask));
         next_dex_register_map_offset += register_region.size();
         DexRegisterMap dex_register_map(register_region);
         stack_map.SetDexRegisterMapOffset(
@@ -318,8 +312,9 @@
           stack_map_encoding_, inline_region.start() - dex_register_locations_region.start());
 
       inline_info.SetDepth(entry.inlining_depth);
+      DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
       for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
-        InlineInfoEntry inline_entry = inline_infos_.Get(depth + entry.inline_infos_start_index);
+        InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
         inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index);
         inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc);
         inline_info.SetInvokeTypeAtDepth(depth, inline_entry.invoke_type);
@@ -331,7 +326,7 @@
           MemoryRegion register_region = dex_register_locations_region.Subregion(
               next_dex_register_map_offset,
               ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
-                                        *inline_entry.live_dex_registers_mask));
+                                        inline_entry.live_dex_registers_mask));
           next_dex_register_map_offset += register_region.size();
           DexRegisterMap dex_register_map(register_region);
           inline_info.SetDexRegisterMapOffsetAtDepth(
@@ -357,42 +352,43 @@
                                           uint32_t start_index_in_dex_register_locations) const {
   dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask);
   // Set the dex register location mapping data.
-  for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
-       dex_register_number < num_dex_registers;
-       ++dex_register_number) {
-    if (live_dex_registers_mask.IsBitSet(dex_register_number)) {
-      size_t location_catalog_entry_index = dex_register_locations_.Get(
-          start_index_in_dex_register_locations + index_in_dex_register_locations);
-      dex_register_map.SetLocationCatalogEntryIndex(
-          index_in_dex_register_locations,
-          location_catalog_entry_index,
-          num_dex_registers,
-          location_catalog_entries_.Size());
-      ++index_in_dex_register_locations;
-    }
+  size_t number_of_live_dex_registers = live_dex_registers_mask.NumSetBits();
+  DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size());
+  DCHECK_LE(start_index_in_dex_register_locations,
+            dex_register_locations_.size() - number_of_live_dex_registers);
+  for (size_t index_in_dex_register_locations = 0;
+      index_in_dex_register_locations != number_of_live_dex_registers;
+       ++index_in_dex_register_locations) {
+    size_t location_catalog_entry_index = dex_register_locations_[
+        start_index_in_dex_register_locations + index_in_dex_register_locations];
+    dex_register_map.SetLocationCatalogEntryIndex(
+        index_in_dex_register_locations,
+        location_catalog_entry_index,
+        num_dex_registers,
+        location_catalog_entries_.size());
   }
 }
 
 size_t StackMapStream::FindEntryWithTheSameDexMap() {
-  size_t current_entry_index = stack_maps_.Size();
+  size_t current_entry_index = stack_maps_.size();
   auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash);
   if (entries_it == dex_map_hash_to_stack_map_indices_.end()) {
     // We don't have a perfect hash functions so we need a list to collect all stack maps
     // which might have the same dex register map.
-    GrowableArray<uint32_t> stack_map_indices(allocator_, 1);
-    stack_map_indices.Add(current_entry_index);
-    dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices);
+    ArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream));
+    stack_map_indices.push_back(current_entry_index);
+    dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash,
+                                           std::move(stack_map_indices));
     return kNoSameDexMapFound;
   }
 
   // We might have collisions, so we need to check whether or not we really have a match.
-  for (size_t i = 0; i < entries_it->second.Size(); i++) {
-    size_t test_entry_index = entries_it->second.Get(i);
-    if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) {
+  for (uint32_t test_entry_index : entries_it->second) {
+    if (HaveTheSameDexMaps(GetStackMap(test_entry_index), current_entry_)) {
       return test_entry_index;
     }
   }
-  entries_it->second.Add(current_entry_index);
+  entries_it->second.push_back(current_entry_index);
   return kNoSameDexMapFound;
 }
 
@@ -406,21 +402,22 @@
   if (a.num_dex_registers != b.num_dex_registers) {
     return false;
   }
-
-  int index_in_dex_register_locations = 0;
-  for (uint32_t i = 0; i < a.num_dex_registers; i++) {
-    if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) {
+  if (a.num_dex_registers != 0u) {
+    DCHECK(a.live_dex_registers_mask != nullptr);
+    DCHECK(b.live_dex_registers_mask != nullptr);
+    if (!a.live_dex_registers_mask->Equal(b.live_dex_registers_mask)) {
       return false;
     }
-    if (a.live_dex_registers_mask->IsBitSet(i)) {
-      size_t a_loc = dex_register_locations_.Get(
-          a.dex_register_locations_start_index + index_in_dex_register_locations);
-      size_t b_loc = dex_register_locations_.Get(
-          b.dex_register_locations_start_index + index_in_dex_register_locations);
-      if (a_loc != b_loc) {
-        return false;
-      }
-      ++index_in_dex_register_locations;
+    size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits();
+    DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size());
+    DCHECK_LE(a.dex_register_locations_start_index,
+              dex_register_locations_.size() - number_of_live_dex_registers);
+    DCHECK_LE(b.dex_register_locations_start_index,
+              dex_register_locations_.size() - number_of_live_dex_registers);
+    auto a_begin = dex_register_locations_.begin() + a.dex_register_locations_start_index;
+    auto b_begin = dex_register_locations_.begin() + b.dex_register_locations_start_index;
+    if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) {
+      return false;
     }
   }
   return true;
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 703b6f7..4783e28 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -24,7 +24,6 @@
 #include "memory_region.h"
 #include "nodes.h"
 #include "stack_map.h"
-#include "utils/growable_array.h"
 
 namespace art {
 
@@ -62,15 +61,16 @@
  public:
   explicit StackMapStream(ArenaAllocator* allocator)
       : allocator_(allocator),
-        stack_maps_(allocator, 10),
-        location_catalog_entries_(allocator, 4),
-        dex_register_locations_(allocator, 10 * 4),
-        inline_infos_(allocator, 2),
+        stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)),
+        location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
+        dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
+        inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_mask_max_(-1),
         dex_pc_max_(0),
         register_mask_max_(0),
         number_of_stack_maps_with_inline_info_(0),
-        dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()),
+        dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(),
+                                           allocator->Adapter(kArenaAllocStackMapStream)),
         current_entry_(),
         current_inline_info_(),
         stack_mask_size_(0),
@@ -84,7 +84,12 @@
         inline_infos_start_(0),
         needed_size_(0),
         current_dex_register_(0),
-        in_inline_frame_(false) {}
+        in_inline_frame_(false) {
+    stack_maps_.reserve(10);
+    location_catalog_entries_.reserve(4);
+    dex_register_locations_.reserve(10 * 4);
+    inline_infos_.reserve(2);
+  }
 
   // See runtime/stack_map.h to know what these fields contain.
   struct StackMapEntry {
@@ -127,17 +132,17 @@
   void EndInlineInfoEntry();
 
   size_t GetNumberOfStackMaps() const {
-    return stack_maps_.Size();
+    return stack_maps_.size();
   }
 
   const StackMapEntry& GetStackMap(size_t i) const {
-    DCHECK_LT(i, stack_maps_.Size());
-    return stack_maps_.GetRawStorage()[i];
+    DCHECK_LT(i, stack_maps_.size());
+    return stack_maps_[i];
   }
 
   void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
-    DCHECK_LT(i, stack_maps_.Size());
-    stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset;
+    DCHECK_LT(i, stack_maps_.size());
+    stack_maps_[i].native_pc_offset = native_pc_offset;
   }
 
   uint32_t ComputeMaxNativePcOffset() const;
@@ -150,7 +155,7 @@
  private:
   size_t ComputeDexRegisterLocationCatalogSize() const;
   size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers,
-                                   const BitVector& live_dex_registers_mask) const;
+                                   const BitVector* live_dex_registers_mask) const;
   size_t ComputeDexRegisterMapsSize() const;
   size_t ComputeInlineInfoSize() const;
 
@@ -164,10 +169,10 @@
                             uint32_t start_index_in_dex_register_locations) const;
 
   ArenaAllocator* allocator_;
-  GrowableArray<StackMapEntry> stack_maps_;
+  ArenaVector<StackMapEntry> stack_maps_;
 
   // A catalog of unique [location_kind, register_value] pairs (per method).
-  GrowableArray<DexRegisterLocation> location_catalog_entries_;
+  ArenaVector<DexRegisterLocation> location_catalog_entries_;
   // Map from Dex register location catalog entries to their indices in the
   // location catalog.
   typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn,
@@ -175,14 +180,14 @@
   LocationCatalogEntriesIndices location_catalog_entries_indices_;
 
   // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
-  GrowableArray<size_t> dex_register_locations_;
-  GrowableArray<InlineInfoEntry> inline_infos_;
+  ArenaVector<size_t> dex_register_locations_;
+  ArenaVector<InlineInfoEntry> inline_infos_;
   int stack_mask_max_;
   uint32_t dex_pc_max_;
   uint32_t register_mask_max_;
   size_t number_of_stack_maps_with_inline_info_;
 
-  ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_;
+  ArenaSafeMap<uint32_t, ArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_;
 
   StackMapEntry current_entry_;
   InlineInfoEntry current_inline_info_;
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index 303e0d5..48f0328 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -161,6 +161,15 @@
   value_type* data() { return array_; }
   const value_type* data() const { return array_; }
 
+  ArrayRef SubArray(size_type pos) const {
+    return SubArray(pos, size_ - pos);
+  }
+  ArrayRef SubArray(size_type pos, size_type length) const {
+    DCHECK_LE(pos, size());
+    DCHECK_LE(length, size() - pos);
+    return ArrayRef(array_ + pos, length);
+  }
+
  private:
   T* array_;
   size_t size_;
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
deleted file mode 100644
index f85e026..0000000
--- a/compiler/utils/growable_array.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_GROWABLE_ARRAY_H_
-#define ART_COMPILER_UTILS_GROWABLE_ARRAY_H_
-
-#include <stdint.h>
-#include <stddef.h>
-
-#include "base/arena_object.h"
-
-namespace art {
-
-// Deprecated
-// TODO: Replace all uses with ArenaVector<T>.
-template<typename T>
-class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> {
-  public:
-    GrowableArray(ArenaAllocator* arena, size_t init_length)
-      : arena_(arena),
-        num_allocated_(init_length),
-        num_used_(0) {
-      elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray);
-    }
-
-    GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data)
-      : arena_(arena),
-        num_allocated_(init_length),
-        num_used_(init_length) {
-      elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray);
-      for (size_t i = 0; i < init_length; ++i) {
-        elem_list_[i] = initial_data;
-      }
-    }
-
-    bool Contains(T value, size_t start_from = 0) const {
-      for (size_t i = start_from; i < num_used_; ++i) {
-        if (elem_list_[i] == value) {
-          return true;
-        }
-      }
-      return false;
-    }
-
-    // Expand the list size to at least new length.
-    void Resize(size_t new_length) {
-      if (new_length <= num_allocated_) return;
-      // If it's a small list double the size, else grow 1.5x.
-      size_t target_length =
-          (num_allocated_ < 128) ? num_allocated_ << 1 : num_allocated_ + (num_allocated_ >> 1);
-      if (new_length > target_length) {
-         target_length = new_length;
-      }
-      T* new_array = arena_->AllocArray<T>(target_length, kArenaAllocGrowableArray);
-      memcpy(new_array, elem_list_, sizeof(T) * num_allocated_);
-      num_allocated_ = target_length;
-      elem_list_ = new_array;
-    }
-
-    // NOTE: does not return storage, just resets use count.
-    void Reset() {
-      num_used_ = 0;
-    }
-
-    // Insert an element to the end of a list, resizing if necessary.
-    void Insert(T elem) {
-      if (num_used_ == num_allocated_) {
-        Resize(num_used_ + 1);
-      }
-      elem_list_[num_used_++] = elem;
-    }
-
-    void InsertAt(size_t index, T elem) {
-      DCHECK(index <= Size());
-      Insert(elem);
-      for (size_t i = Size() - 1; i > index; --i) {
-        elem_list_[i] = elem_list_[i - 1];
-      }
-      elem_list_[index] = elem;
-    }
-
-    void Add(T elem) {
-      Insert(elem);
-    }
-
-    T Get(size_t index) const {
-      DCHECK_LT(index, num_used_);
-      return elem_list_[index];
-    }
-
-    // Overwrite existing element at position index.  List must be large enough.
-    void Put(size_t index, T elem) {
-      DCHECK_LT(index, num_used_);
-      elem_list_[index] = elem;
-    }
-
-    void Increment(size_t index) {
-      DCHECK_LT(index, num_used_);
-      elem_list_[index]++;
-    }
-
-    /*
-     * Remove an existing element from list.  If there are more than one copy
-     * of the element, only the first one encountered will be deleted.
-     */
-    // TODO: consider renaming this.
-    void Delete(T element) {
-      bool found = false;
-      for (size_t i = 0; i < num_used_ - 1; i++) {
-        if (!found && elem_list_[i] == element) {
-          found = true;
-        }
-        if (found) {
-          elem_list_[i] = elem_list_[i+1];
-        }
-      }
-      // We should either have found the element, or it was the last (unscanned) element.
-      DCHECK(found || (element == elem_list_[num_used_ - 1]));
-      num_used_--;
-    }
-
-    void DeleteAt(size_t index) {
-      for (size_t i = index; i < num_used_ - 1; i++) {
-        elem_list_[i] = elem_list_[i + 1];
-      }
-      num_used_--;
-    }
-
-    size_t GetNumAllocated() const { return num_allocated_; }
-
-    size_t Size() const { return num_used_; }
-
-    bool IsEmpty() const { return num_used_ == 0; }
-
-    T Pop() {
-      DCHECK_GE(num_used_, (size_t)0);
-      return elem_list_[--num_used_];
-    }
-
-    T Peek() const {
-      DCHECK_GE(num_used_, (size_t)0);
-      return elem_list_[num_used_ - 1];
-    }
-
-    void SetSize(size_t new_size) {
-      Resize(new_size);
-      num_used_ = new_size;
-    }
-
-    T* GetRawStorage() const { return elem_list_; }
-
-  private:
-    ArenaAllocator* const arena_;
-    size_t num_allocated_;
-    size_t num_used_;
-    T* elem_list_;
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_GROWABLE_ARRAY_H_
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index 3cfdc4c..e252765 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -58,14 +58,16 @@
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host,art/compiler,host,ndebug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz libbacktrace libcutils libunwindbacktrace libutils libbase,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz \
+        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
   endif
 endif
 
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host,art/compiler,host,debug,$(dex2oat_host_arch)))
   ifeq ($(ART_BUILD_HOST_STATIC),true)
-    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz libbacktrace libcutils libunwindbacktrace libutils libbase,art/compiler,host,debug,$(dex2oat_host_arch),static))
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz \
+        libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,debug,$(dex2oat_host_arch),static))
   endif
 endif
 
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index cc32da1..680e2d7 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -235,11 +235,7 @@
   UsageError("  --compiler-backend=(Quick|Optimizing): select compiler backend");
   UsageError("      set.");
   UsageError("      Example: --compiler-backend=Optimizing");
-  if (kUseOptimizingCompiler) {
-    UsageError("      Default: Optimizing");
-  } else {
-    UsageError("      Default: Quick");
-  }
+  UsageError("      Default: Optimizing");
   UsageError("");
   UsageError("  --compiler-filter="
                 "(verify-none"
@@ -503,7 +499,7 @@
 class Dex2Oat FINAL {
  public:
   explicit Dex2Oat(TimingLogger* timings) :
-      compiler_kind_(kUseOptimizingCompiler ? Compiler::kOptimizing : Compiler::kQuick),
+      compiler_kind_(Compiler::kOptimizing),
       instruction_set_(kRuntimeISA),
       // Take the default set of instruction features from the build.
       verification_results_(nullptr),
@@ -752,10 +748,9 @@
 
   void ProcessOptions(ParserOptions* parser_options) {
     image_ = (!image_filename_.empty());
-    if (!parser_options->requested_specific_compiler && !kUseOptimizingCompiler) {
-      // If no specific compiler is requested, the current behavior is
-      // to compile the boot image with Quick, and the rest with Optimizing.
-      compiler_kind_ = image_ ? Compiler::kQuick : Compiler::kOptimizing;
+    if (image_) {
+      // We need the boot image to always be debuggable.
+      parser_options->debuggable = true;
     }
 
     if (oat_filename_.empty() && oat_fd_ == -1) {
diff --git a/dexdump/Android.mk b/dexdump/Android.mk
index a208ccf..ec2529e 100755
--- a/dexdump/Android.mk
+++ b/dexdump/Android.mk
@@ -34,8 +34,6 @@
 LOCAL_CFLAGS += -Wall
 LOCAL_SHARED_LIBRARIES += $(dexdump_libraries)
 LOCAL_MODULE := dexdump2
-LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE_PATH := $(TARGET_OUT_OPTIONAL_EXECUTABLES)
 include $(BUILD_EXECUTABLE)
 endif # !SDK_ONLY
 
diff --git a/dexdump/dexdump_test.cc b/dexdump/dexdump_test.cc
index d9b210d..4230cb2 100644
--- a/dexdump/dexdump_test.cc
+++ b/dexdump/dexdump_test.cc
@@ -43,12 +43,7 @@
   // Runs test with given arguments.
   bool Exec(const std::vector<std::string>& args, std::string* error_msg) {
     // TODO(ajcbik): dexdump2 -> dexdump
-    std::string file_path = GetTestAndroidRoot();
-    if (IsHost()) {
-      file_path += "/bin/dexdump2";
-    } else {
-      file_path += "/xbin/dexdump2";
-    }
+    std::string file_path = GetTestAndroidRoot() + "/bin/dexdump2";
     EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
     std::vector<std::string> exec_argv = { file_path };
     exec_argv.insert(exec_argv.end(), args.begin(), args.end());
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 1ac7930..82bc8b9 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -109,11 +109,12 @@
   std::string boot_image_location_;
 };
 
-#if defined (ART_TARGET)
+#if defined (ART_TARGET) && !defined(__mips__)
 TEST_F(ImgDiagTest, ImageDiffPidSelf) {
 #else
 // Can't run this test on the host, it will fail when trying to open /proc/kpagestats
 // because it's root read-only.
+// Also test fails on mips. b/24596015.
 TEST_F(ImgDiagTest, DISABLED_ImageDiffPidSelf) {
 #endif
   // Invoke 'img_diag' against the current process.
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 995a1d5..059c4cd 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -104,6 +104,7 @@
   lambda/box_table.cc \
   lambda/closure.cc \
   lambda/closure_builder.cc \
+  lambda/leaking_allocator.cc \
   jni_internal.cc \
   jobject_comparator.cc \
   linear_alloc.cc \
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 403d348..8f6b1ff 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -30,9 +30,11 @@
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
   gprs_[PC] = &pc_;
+  gprs_[R0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = ArmContext::kBadGprBase + SP;
   pc_ = ArmContext::kBadGprBase + PC;
+  arg0_ = 0;
 }
 
 void ArmContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 77bb5c8..ea31055 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -45,6 +45,10 @@
     SetGPR(PC, new_pc);
   }
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(R0, new_arg0_value);
+  }
+
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
     return gprs_[reg] != nullptr;
@@ -84,7 +88,7 @@
   uintptr_t* gprs_[kNumberOfCoreRegisters];
   uint32_t* fprs_[kNumberOfSRegisters];
   // Hold values for sp and pc if they are not located within a stack frame.
-  uintptr_t sp_, pc_;
+  uintptr_t sp_, pc_, arg0_;
 };
 
 }  // namespace arm
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index e45d828..d09631b 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -437,8 +437,8 @@
     ldr  r14, [r0, #56]   @ (LR from gprs_ 56=4*14)
     add  r0, r0, #12      @ increment r0 to skip gprs_[0..2] 12=4*3
     ldm  r0, {r3-r13}     @ load remaining gprs from argument gprs_
-    mov  r0, #0           @ clear result registers r0 and r1
-    mov  r1, #0
+    ldr  r0, [r0, #-12]   @ load r0 value
+    mov  r1, #0           @ clear result register r1
     bx   r2               @ do long jump
 END art_quick_do_long_jump
 
@@ -839,13 +839,12 @@
 TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
     /*
      * Called by managed code to resolve a static field and store a 64-bit primitive value.
-     * On entry r0 holds field index, r1:r2 hold new_val
+     * On entry r0 holds field index, r2:r3 hold new_val
      */
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12   @ save callee saves in case of GC
-    mov    r3, r2                        @ pass one half of wide argument
-    mov    r2, r1                        @ pass other half of wide argument
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r12   @ save callee saves in case of GC
+                                         @ r2:r3 contain the wide argument
     ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
@@ -870,6 +869,7 @@
     .extern artSet64InstanceFromCode
 ENTRY art_quick_set64_instance
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12, lr  @ save callee saves in case of GC
+                                         @ r2:r3 contain the wide argument
     ldr    r12, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
     str    r9, [sp, #-12]!               @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 12
@@ -1142,7 +1142,7 @@
 
     /*
      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
-     * will long jump to the upcall with a special exception of -1.
+     * will long jump to the interpreter bridge.
      */
     .extern artDeoptimizeFromCompiledCode
 ENTRY art_quick_deoptimize_from_compiled_code
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 60becc6..4477631 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -31,10 +31,12 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
-  gprs_[LR] = &pc_;
+  gprs_[kPC] = &pc_;
+  gprs_[X0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = Arm64Context::kBadGprBase + SP;
-  pc_ = Arm64Context::kBadGprBase + LR;
+  pc_ = Arm64Context::kBadGprBase + kPC;
+  arg0_ = 0;
 }
 
 void Arm64Context::FillCalleeSaves(const StackVisitor& fr) {
@@ -58,8 +60,8 @@
 }
 
 void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
-  DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
-  DCHECK_NE(reg, static_cast<uint32_t>(XZR));
+  DCHECK_LT(reg, arraysize(gprs_));
+  // Note: we use kPC == XZR, so do not ensure that reg != XZR.
   DCHECK(IsAccessibleGPR(reg));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
   *gprs_[reg] = value;
@@ -124,13 +126,13 @@
 extern "C" NO_RETURN void art_quick_do_long_jump(uint64_t*, uint64_t*);
 
 void Arm64Context::DoLongJump() {
-  uint64_t gprs[kNumberOfXRegisters];
+  uint64_t gprs[arraysize(gprs_)];
   uint64_t fprs[kNumberOfDRegisters];
 
   // The long jump routine called below expects to find the value for SP at index 31.
   DCHECK_EQ(SP, 31);
 
-  for (size_t i = 0; i < kNumberOfXRegisters; ++i) {
+  for (size_t i = 0; i < arraysize(gprs_); ++i) {
     gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index 1c99f3c..11314e0 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -42,20 +42,25 @@
   }
 
   void SetPC(uintptr_t new_lr) OVERRIDE {
-    SetGPR(LR, new_lr);
+    SetGPR(kPC, new_lr);
+  }
+
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(X0, new_arg0_value);
   }
 
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
-    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+    DCHECK_LT(reg, arraysize(gprs_));
     return gprs_[reg] != nullptr;
   }
 
   uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
-    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+    DCHECK_LT(reg, arraysize(gprs_));
     return gprs_[reg];
   }
 
   uintptr_t GetGPR(uint32_t reg) OVERRIDE {
+    // Note: PC isn't an available GPR (outside of internals), so don't allow retrieving the value.
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
     DCHECK(IsAccessibleGPR(reg));
     return *gprs_[reg];
@@ -79,12 +84,15 @@
   void SmashCallerSaves() OVERRIDE;
   NO_RETURN void DoLongJump() OVERRIDE;
 
+  static constexpr size_t kPC = kNumberOfXRegisters;
+
  private:
-  // Pointers to register locations, initialized to null or the specific registers below.
-  uintptr_t* gprs_[kNumberOfXRegisters];
+  // Pointers to register locations, initialized to null or the specific registers below. We need
+  // an additional one for the PC.
+  uintptr_t* gprs_[kNumberOfXRegisters + 1];
   uint64_t * fprs_[kNumberOfDRegisters];
-  // Hold values for sp and pc if they are not located within a stack frame.
-  uintptr_t sp_, pc_;
+  // Hold values for sp, pc and arg0 if they are not located within a stack frame.
+  uintptr_t sp_, pc_, arg0_;
 };
 
 }  // namespace arm64
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 169bc38..be5a15e 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -941,7 +941,7 @@
     // Load GPRs
     // TODO: lots of those are smashed, could optimize.
     add x0, x0, #30*8
-    ldp x30, x1, [x0], #-16
+    ldp x30, x1, [x0], #-16          // LR & SP
     ldp x28, x29, [x0], #-16
     ldp x26, x27, [x0], #-16
     ldp x24, x25, [x0], #-16
@@ -958,10 +958,12 @@
     ldp x2, x3, [x0], #-16
     mov sp, x1
 
-    // TODO: Is it really OK to use LR for the target PC?
-    mov x0, #0
-    mov x1, #0
-    br  xLR
+    // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
+    ldr x1, [x0, #33*8]
+    // And the value of x0.
+    ldr x0, [x0]
+
+    br  x1
 END art_quick_do_long_jump
 
     /*
@@ -1419,9 +1421,8 @@
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    mov    x3, x1                     // Store value
     ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
-    mov    x2, x3                     // Put value param
+                                      // x2 contains the parameter
     mov    x3, xSELF                  // pass Thread::Current
     bl     artSet64StaticFromCode
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 9ef761e..9af7c04 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -50,6 +50,9 @@
   // Sets the program counter value.
   virtual void SetPC(uintptr_t new_pc) = 0;
 
+  // Sets the first argument register.
+  virtual void SetArg0(uintptr_t new_arg0_value) = 0;
+
   // Returns whether the given GPR is accessible (read or write).
   virtual bool IsAccessibleGPR(uint32_t reg) = 0;
 
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index bc2bf68..08ab356 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -30,9 +30,11 @@
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
   gprs_[RA] = &ra_;
+  gprs_[A0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = MipsContext::kBadGprBase + SP;
   ra_ = MipsContext::kBadGprBase + RA;
+  arg0_ = 0;
 }
 
 void MipsContext::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index 38cf29a..0affe53 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -78,12 +78,17 @@
   void SmashCallerSaves() OVERRIDE;
   NO_RETURN void DoLongJump() OVERRIDE;
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(A0, new_arg0_value);
+  }
+
  private:
   // Pointers to registers in the stack, initialized to null except for the special cases below.
   uintptr_t* gprs_[kNumberOfCoreRegisters];
   uint32_t* fprs_[kNumberOfFRegisters];
-  // Hold values for sp and ra (return address) if they are not located within a stack frame.
-  uintptr_t sp_, ra_;
+  // Hold values for sp and ra (return address) if they are not located within a stack frame, as
+  // well as the first argument.
+  uintptr_t sp_, ra_, arg0_;
 };
 }  // namespace mips
 }  // namespace art
diff --git a/runtime/arch/mips/memcmp16_mips.S b/runtime/arch/mips/memcmp16_mips.S
index aef81af..c8eac9b 100644
--- a/runtime/arch/mips/memcmp16_mips.S
+++ b/runtime/arch/mips/memcmp16_mips.S
@@ -26,7 +26,7 @@
   beqz  $a2,done   /* 0 length string */
   beq $a0,$a1,done    /* strings are identical */
 
-  /* Unoptimised... */
+  /* Unoptimized... */
 1:  lhu $t0,0($a0)
   lhu $t1,0($a1)
   addu  $a1,2
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index 6637c37..2c17f1c 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -29,10 +29,12 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[SP] = &sp_;
-  gprs_[RA] = &ra_;
+  gprs_[T9] = &t9_;
+  gprs_[A0] = &arg0_;
   // Initialize registers with easy to spot debug values.
   sp_ = Mips64Context::kBadGprBase + SP;
-  ra_ = Mips64Context::kBadGprBase + RA;
+  t9_ = Mips64Context::kBadGprBase + T9;
+  arg0_ = 0;
 }
 
 void Mips64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h
index e4a144f..84b1c9b 100644
--- a/runtime/arch/mips64/context_mips64.h
+++ b/runtime/arch/mips64/context_mips64.h
@@ -41,7 +41,7 @@
   }
 
   void SetPC(uintptr_t new_pc) OVERRIDE {
-    SetGPR(RA, new_pc);
+    SetGPR(T9, new_pc);
   }
 
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
@@ -78,13 +78,20 @@
   void SmashCallerSaves() OVERRIDE;
   NO_RETURN void DoLongJump() OVERRIDE;
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(A0, new_arg0_value);
+  }
+
  private:
   // Pointers to registers in the stack, initialized to null except for the special cases below.
   uintptr_t* gprs_[kNumberOfGpuRegisters];
   uint64_t* fprs_[kNumberOfFpuRegisters];
-  // Hold values for sp and ra (return address) if they are not located within a stack frame.
-  uintptr_t sp_, ra_;
+  // Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the
+  // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We
+  // also need the first argument for single-frame deopt.
+  uintptr_t sp_, t9_, arg0_;
 };
+
 }  // namespace mips64
 }  // namespace art
 
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 1b50b2e..68156ae 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -431,7 +431,7 @@
     ld      $ra, 248($a0)
     ld      $a0, 32($a0)
     move    $v0, $zero          # clear result registers v0 and v1
-    jalr    $zero, $ra          # do long jump
+    jalr    $zero, $t9          # do long jump (do not use ra, it must not be clobbered)
     move    $v1, $zero
 END art_quick_do_long_jump
 
@@ -1244,7 +1244,7 @@
     .extern artSet64StaticFromCode
 ENTRY art_quick_set64_static
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    # save callee saves in case of GC
-    move   $a2, $a1                      # pass new_val
+                                         # a2 contains the new val
     ld     $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp)  # pass referrer's Method*
     jal    artSet64StaticFromCode        # (field_idx, referrer, new_val, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index 38bc8f2..1d07d47 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -59,6 +59,8 @@
   SP   = 29,  // Stack pointer.
   S8   = 30,  // Saved value/frame pointer.
   RA   = 31,  // Return address.
+  TR   = S1,  // ART Thread Register
+  TMP  = T8,  // scratch register (in addition to AT)
   kNumberOfGpuRegisters = 32,
   kNoGpuRegister = -1  // Signals an illegal register.
 };
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index f10799c..016c664 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -418,6 +418,48 @@
     return result;
   }
 
+  // 64bit static field set use a slightly different register order than Invoke3WithReferrer.
+  // TODO: implement for other architectures
+  // TODO: try merge with Invoke3WithReferrer
+  size_t Invoke64StaticSet(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self,
+                           ArtMethod* referrer) {
+    // Push a transition back into managed code onto the linked list in thread.
+    ManagedStack fragment;
+    self->PushManagedStackFragment(&fragment);
+
+    size_t result;
+    size_t fpr_result = 0;
+#if defined(__x86_64__) && !defined(__APPLE__) && defined(__clang__)
+    // Note: Uses the native convention
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "pushq %[referrer]\n\t"        // Push referrer
+        "pushq (%%rsp)\n\t"             // & 16B alignment padding
+        ".cfi_adjust_cfa_offset 16\n\t"
+        "call *%%rax\n\t"              // Call the stub
+        "addq $16, %%rsp\n\t"          // Pop null and padding
+        ".cfi_adjust_cfa_offset -16\n\t"
+        : "=a" (result)
+          // Use the result from rax
+        : "D"(arg0), "d"(arg1), "S"(arg2), "a"(code), [referrer] "c"(referrer)
+          // This places arg0 into rdi, arg1 into rdx, arg2 into rsi, and code into rax
+        : "rbx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+          "memory");  // clobber all
+    // TODO: Should we clobber the other registers?
+#else
+    UNUSED(arg0, arg1, arg2, code, referrer);
+    LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
+    result = 0;
+#endif
+    // Pop transition.
+    self->PopManagedStackFragment(fragment);
+
+    fp_result = fpr_result;
+    EXPECT_EQ(0U, fp_result);
+
+    return result;
+  }
+
   // TODO: Set up a frame according to referrer's specs.
   size_t Invoke3WithReferrerAndHidden(size_t arg0, size_t arg1, size_t arg2, uintptr_t code,
                                       Thread* self, ArtMethod* referrer, size_t hidden) {
@@ -774,22 +816,6 @@
     return result;
   }
 
-  // Method with 32b arg0, 64b arg1
-  size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self,
-                              ArtMethod* referrer) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
-    defined(__aarch64__)
-    // Just pass through.
-    return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer);
-#else
-    // Need to split up arguments.
-    uint32_t lower = static_cast<uint32_t>(arg1 & 0xFFFFFFFF);
-    uint32_t upper = static_cast<uint32_t>((arg1 >> 32) & 0xFFFFFFFF);
-
-    return Invoke3WithReferrer(arg0, lower, upper, code, self, referrer);
-#endif
-  }
-
   static uintptr_t GetEntrypoint(Thread* self, QuickEntrypointEnum entrypoint) {
     int32_t offset;
 #ifdef __LP64__
@@ -1974,21 +2000,22 @@
 }
 
 
-// TODO: Complete these tests for 32b architectures.
+// TODO: Complete these tests for 32b architectures
 
 static void GetSet64Static(ArtField* f, Thread* self, ArtMethod* referrer,
                            StubTest* test)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \
-    defined(__aarch64__)
+// TODO: (defined(__mips__) && defined(__LP64__)) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__))
   uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3UWithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
-                               values[i],
-                               StubTest::GetEntrypoint(self, kQuickSet64Static),
-                               self,
-                               referrer);
+    test->Invoke64StaticSet(static_cast<size_t>(f->GetDexFieldIndex()),
+                            values[i],
+                            0U,
+                            StubTest::GetEntrypoint(self, kQuickSet64Static),
+                            self,
+                            referrer);
 
     size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                            0U, 0U,
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 7096c82..987ad60 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -29,9 +29,11 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[ESP] = &esp_;
+  gprs_[EAX] = &arg0_;
   // Initialize registers with easy to spot debug values.
   esp_ = X86Context::kBadGprBase + ESP;
   eip_ = X86Context::kBadGprBase + kNumberOfCpuRegisters;
+  arg0_ = 0;
 }
 
 void X86Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index c4a11d8..59beb12 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -44,6 +44,10 @@
     eip_ = new_pc;
   }
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(EAX, new_arg0_value);
+  }
+
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg] != nullptr;
@@ -95,10 +99,10 @@
   // Pointers to register locations. Values are initialized to null or the special registers below.
   uintptr_t* gprs_[kNumberOfCpuRegisters];
   uint32_t* fprs_[kNumberOfFloatRegisters];
-  // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat
+  // Hold values for esp, eip and arg0 if they are not located within a stack frame. EIP is somewhat
   // special in that it cannot be encoded normally as a register operand to an instruction (except
   // in 64bit addressing modes).
-  uintptr_t esp_, eip_;
+  uintptr_t esp_, eip_, arg0_;
 };
 }  // namespace x86
 }  // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 029a296..3afc4d5 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1434,15 +1434,18 @@
 // Call artSet64StaticFromCode with 3 word size arguments plus with the referrer in the 2nd position
 // so that new_val is aligned on even registers were we passing arguments in registers.
 DEFINE_FUNCTION art_quick_set64_static
+    // TODO: Implement SETUP_GOT_NOSAVE for got_reg = ecx to avoid moving around the registers.
+    movd %ebx, %xmm0
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
-    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ebx  // get referrer
+    movd %xmm0, %ebx
+    mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx  // get referrer
     subl LITERAL(12), %esp        // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH edx                      // pass high half of new_val
-    PUSH ecx                      // pass low half of new_val
-    PUSH ebx                      // pass referrer
+    PUSH ebx                      // pass high half of new_val
+    PUSH edx                      // pass low half of new_val
+    PUSH ecx                      // pass referrer
     PUSH eax                      // pass field_idx
     call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
     addl LITERAL(32), %esp        // pop arguments
@@ -1695,7 +1698,7 @@
 
     /*
      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
-     * will long jump to the upcall with a special exception of -1.
+     * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 1fe2ef8..3dc7d71 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -29,9 +29,11 @@
   std::fill_n(gprs_, arraysize(gprs_), nullptr);
   std::fill_n(fprs_, arraysize(fprs_), nullptr);
   gprs_[RSP] = &rsp_;
+  gprs_[RDI] = &arg0_;
   // Initialize registers with easy to spot debug values.
   rsp_ = X86_64Context::kBadGprBase + RSP;
   rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters;
+  arg0_ = 0;
 }
 
 void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index 30bb9ec..f05b7f0 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -44,6 +44,10 @@
     rip_ = new_pc;
   }
 
+  void SetArg0(uintptr_t new_arg0_value) OVERRIDE {
+    SetGPR(RDI, new_arg0_value);
+  }
+
   bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg] != nullptr;
@@ -82,10 +86,10 @@
   // Pointers to register locations. Values are initialized to null or the special registers below.
   uintptr_t* gprs_[kNumberOfCpuRegisters];
   uint64_t* fprs_[kNumberOfFloatRegisters];
-  // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat
+  // Hold values for rsp, rip and arg0 if they are not located within a stack frame. RIP is somewhat
   // special in that it cannot be encoded normally as a register operand to an instruction (except
   // in 64bit addressing modes).
-  uintptr_t rsp_, rip_;
+  uintptr_t rsp_, rip_, arg0_;
 };
 }  // namespace x86_64
 }  // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 861f802..1133203 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1383,7 +1383,7 @@
 
 // This is singled out as the argument order is different.
 DEFINE_FUNCTION art_quick_set64_static
-    movq %rsi, %rdx                      // pass new_val
+                                         // new_val is already in %rdx
     movq 8(%rsp), %rsi                   // pass referrer
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
                                          // field_idx is in rdi
@@ -1724,18 +1724,18 @@
      * will long jump to the upcall with a special exception of -1.
      */
 DEFINE_FUNCTION art_quick_deoptimize
-    pushq %rsi                     // Entry point for a jump. Fake that we were called.
-                                   // Use hidden arg.
+    pushq %rsi                         // Entry point for a jump. Fake that we were called.
+                                       // Use hidden arg.
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
-                                   // Stack should be aligned now.
-    movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
-    call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
+                                       // Stack should be aligned now.
+    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
+    call SYMBOL(artDeoptimize)         // artDeoptimize(Thread*)
     UNREACHABLE
 END_FUNCTION art_quick_deoptimize
 
     /*
      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
-     * will long jump to the upcall with a special exception of -1.
+     * will long jump to the interpreter bridge.
      */
 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index d6b2b7e..632a50f 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -35,6 +35,8 @@
 #include "quick/quick_method_frame_info.h"
 #include "read_barrier-inl.h"
 #include "runtime-inl.h"
+#include "scoped_thread_state_change.h"
+#include "thread-inl.h"
 #include "utils.h"
 
 namespace art {
@@ -75,9 +77,28 @@
           expected_root, desired_root);
 }
 
+// AssertSharedHeld doesn't work in GetAccessFlags, so use a NO_THREAD_SAFETY_ANALYSIS helper.
+// TODO: Figure out why ASSERT_SHARED_CAPABILITY doesn't work.
+ALWAYS_INLINE
+static inline void DoGetAccessFlagsHelper(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS {
+  CHECK(method->IsRuntimeMethod() || method->GetDeclaringClass()->IsIdxLoaded() ||
+        method->GetDeclaringClass()->IsErroneous());
+}
+
 inline uint32_t ArtMethod::GetAccessFlags() {
-  DCHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
-         GetDeclaringClass()->IsErroneous());
+  if (kIsDebugBuild) {
+    Thread* self = Thread::Current();
+    if (!Locks::mutator_lock_->IsSharedHeld(self)) {
+      ScopedObjectAccess soa(self);
+      CHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
+            GetDeclaringClass()->IsErroneous());
+    } else {
+      // We cannot use SOA in this case. We might be holding the lock, but may not be in the
+      // runnable state (e.g., during GC).
+      Locks::mutator_lock_->AssertSharedHeld(self);
+      DoGetAccessFlagsHelper(this);
+    }
+  }
   return access_flags_;
 }
 
diff --git a/runtime/art_method.h b/runtime/art_method.h
index f78c827..0315c3a 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -75,7 +75,9 @@
     return MemberOffset(OFFSETOF_MEMBER(ArtMethod, declaring_class_));
   }
 
-  ALWAYS_INLINE uint32_t GetAccessFlags() SHARED_REQUIRES(Locks::mutator_lock_);
+  // Note: GetAccessFlags acquires the mutator lock in debug mode to check that it is not called for
+  // a proxy method.
+  ALWAYS_INLINE uint32_t GetAccessFlags();
 
   void SetAccessFlags(uint32_t new_access_flags) {
     // Not called within a transaction.
@@ -86,77 +88,78 @@
   InvokeType GetInvokeType() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns true if the method is declared public.
-  bool IsPublic() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsPublic() {
     return (GetAccessFlags() & kAccPublic) != 0;
   }
 
   // Returns true if the method is declared private.
-  bool IsPrivate() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsPrivate() {
     return (GetAccessFlags() & kAccPrivate) != 0;
   }
 
   // Returns true if the method is declared static.
-  bool IsStatic() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsStatic() {
     return (GetAccessFlags() & kAccStatic) != 0;
   }
 
   // Returns true if the method is a constructor.
-  bool IsConstructor() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsConstructor() {
     return (GetAccessFlags() & kAccConstructor) != 0;
   }
 
   // Returns true if the method is a class initializer.
-  bool IsClassInitializer() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsClassInitializer() {
     return IsConstructor() && IsStatic();
   }
 
   // Returns true if the method is static, private, or a constructor.
-  bool IsDirect() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsDirect() {
     return IsDirect(GetAccessFlags());
   }
 
   static bool IsDirect(uint32_t access_flags) {
-    return (access_flags & (kAccStatic | kAccPrivate | kAccConstructor)) != 0;
+    constexpr uint32_t direct = kAccStatic | kAccPrivate | kAccConstructor;
+    return (access_flags & direct) != 0;
   }
 
   // Returns true if the method is declared synchronized.
-  bool IsSynchronized() SHARED_REQUIRES(Locks::mutator_lock_) {
-    uint32_t synchonized = kAccSynchronized | kAccDeclaredSynchronized;
+  bool IsSynchronized() {
+    constexpr uint32_t synchonized = kAccSynchronized | kAccDeclaredSynchronized;
     return (GetAccessFlags() & synchonized) != 0;
   }
 
-  bool IsFinal() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsFinal() {
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
-  bool IsMiranda() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsMiranda() {
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
 
-  bool IsNative() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsNative() {
     return (GetAccessFlags() & kAccNative) != 0;
   }
 
-  bool IsFastNative() SHARED_REQUIRES(Locks::mutator_lock_) {
-    uint32_t mask = kAccFastNative | kAccNative;
+  bool IsFastNative() {
+    constexpr uint32_t mask = kAccFastNative | kAccNative;
     return (GetAccessFlags() & mask) == mask;
   }
 
-  bool IsAbstract() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsAbstract() {
     return (GetAccessFlags() & kAccAbstract) != 0;
   }
 
-  bool IsSynthetic() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsSynthetic() {
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
   bool IsProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsPreverified() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool IsPreverified() {
     return (GetAccessFlags() & kAccPreverified) != 0;
   }
 
-  void SetPreverified() SHARED_REQUIRES(Locks::mutator_lock_) {
+  void SetPreverified() {
     DCHECK(!IsPreverified());
     SetAccessFlags(GetAccessFlags() | kAccPreverified);
   }
@@ -404,7 +407,7 @@
     return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size);
   }
 
-  void SetEntryPointFromJni(const void* entrypoint) SHARED_REQUIRES(Locks::mutator_lock_) {
+  void SetEntryPointFromJni(const void* entrypoint) {
     DCHECK(IsNative());
     SetEntryPointFromJniPtrSize(entrypoint, sizeof(void*));
   }
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 4e51f55..345428c 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -55,6 +55,7 @@
   "RegAlloc     ",
   "Data         ",
   "STL          ",
+  "GraphBuilder ",
   "Graph        ",
   "BasicBlock   ",
   "BlockList    ",
@@ -74,12 +75,25 @@
   "Environment  ",
   "EnvVRegs     ",
   "EnvLocations ",
+  "LocSummary   ",
   "SsaBuilder   ",
   "MoveOperands ",
   "CodeBuffer   ",
   "StackMaps    ",
   "BaselineMaps ",
   "Optimization ",
+  "GVN          ",
+  "InductionVar ",
+  "BCE          ",
+  "SsaLiveness  ",
+  "SsaPhiElim   ",
+  "RefTypeProp  ",
+  "PrimTypeProp ",
+  "SideEffects  ",
+  "RegAllocator ",
+  "StackMapStm  ",
+  "CodeGen      ",
+  "ParallelMove ",
 };
 
 template <bool kCount>
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index c5eb741..b4f19ee 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -65,6 +65,7 @@
   kArenaAllocRegAlloc,
   kArenaAllocData,
   kArenaAllocSTL,
+  kArenaAllocGraphBuilder,
   kArenaAllocGraph,
   kArenaAllocBasicBlock,
   kArenaAllocBlockList,
@@ -84,12 +85,25 @@
   kArenaAllocEnvironment,
   kArenaAllocEnvironmentVRegs,
   kArenaAllocEnvironmentLocations,
+  kArenaAllocLocationSummary,
   kArenaAllocSsaBuilder,
   kArenaAllocMoveOperands,
   kArenaAllocCodeBuffer,
   kArenaAllocStackMaps,
   kArenaAllocBaselineMaps,
   kArenaAllocOptimization,
+  kArenaAllocGvn,
+  kArenaAllocInductionVarAnalysis,
+  kArenaAllocBoundsCheckElimination,
+  kArenaAllocSsaLiveness,
+  kArenaAllocSsaPhiElimination,
+  kArenaAllocReferenceTypePropagation,
+  kArenaAllocPrimitiveTypePropagation,
+  kArenaAllocSideEffectsAnalysis,
+  kArenaAllocRegisterAllocator,
+  kArenaAllocStackMapStream,
+  kArenaAllocCodeGenerator,
+  kArenaAllocParallelMoveResolver,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/arena_object.h b/runtime/base/arena_object.h
index ab97d0c..56e35d8 100644
--- a/runtime/base/arena_object.h
+++ b/runtime/base/arena_object.h
@@ -40,6 +40,10 @@
     LOG(FATAL) << "UNREACHABLE";
     UNREACHABLE();
   }
+
+  // NOTE: Providing placement new (and matching delete) for constructing container elements.
+  ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; }
+  ALWAYS_INLINE void operator delete(void*, void*) noexcept { }
 };
 
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index bc8a9f4..b0590e2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -128,7 +128,11 @@
   // the previous error.
   Runtime* const runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {  // Give info if this occurs at runtime.
-    LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c);
+    std::string extra;
+    if (c->GetVerifyErrorClass() != nullptr) {
+      extra = PrettyDescriptor(c->GetVerifyErrorClass());
+    }
+    LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c) << ": " << extra;
   }
 
   CHECK(c->IsErroneous()) << PrettyClass(c) << " " << c->GetStatus();
@@ -1316,14 +1320,6 @@
     // Need to make sure to not copy ArtMethods without doing read barriers since the roots are
     // marked concurrently and we don't hold the classlinker_classes_lock_ when we do the copy.
     boot_class_table_.VisitRoots(buffered_visitor);
-    // TODO: Avoid marking these to enable class unloading.
-    JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
-    for (jweak weak_root : class_loaders_) {
-      mirror::Object* class_loader =
-          down_cast<mirror::ClassLoader*>(vm->DecodeWeakGlobal(self, weak_root));
-      // Don't need to update anything since the class loaders will be updated by SweepSystemWeaks.
-      visitor->VisitRootIfNonNull(&class_loader, RootInfo(kRootVMInternal));
-    }
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_class_roots_) {
       mirror::Class* old_ref = root.Read<kWithoutReadBarrier>();
@@ -1503,13 +1499,10 @@
   STLDeleteElements(&oat_files_);
   Thread* const self = Thread::Current();
   JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
-  for (jweak weak_root : class_loaders_) {
-    auto* const class_loader = down_cast<mirror::ClassLoader*>(
-        vm->DecodeWeakGlobalDuringShutdown(self, weak_root));
-    if (class_loader != nullptr) {
-      delete class_loader->GetClassTable();
-    }
-    vm->DeleteWeakGlobalRef(self, weak_root);
+  for (const ClassLoaderData& data : class_loaders_) {
+    vm->DeleteWeakGlobalRef(self, data.weak_root);
+    delete data.allocator;
+    delete data.class_table;
   }
   class_loaders_.clear();
 }
@@ -2375,21 +2368,25 @@
   }
 }
 
-LengthPrefixedArray<ArtField>* ClassLinker::AllocArtFieldArray(Thread* self, size_t length) {
+LengthPrefixedArray<ArtField>* ClassLinker::AllocArtFieldArray(Thread* self,
+                                                               LinearAlloc* allocator,
+                                                               size_t length) {
   if (length == 0) {
     return nullptr;
   }
   // If the ArtField alignment changes, review all uses of LengthPrefixedArray<ArtField>.
   static_assert(alignof(ArtField) == 4, "ArtField alignment is expected to be 4.");
   size_t storage_size = LengthPrefixedArray<ArtField>::ComputeSize(length);
-  void* array_storage = Runtime::Current()->GetLinearAlloc()->Alloc(self, storage_size);
+  void* array_storage = allocator->Alloc(self, storage_size);
   auto* ret = new(array_storage) LengthPrefixedArray<ArtField>(length);
   CHECK(ret != nullptr);
   std::uninitialized_fill_n(&ret->At(0), length, ArtField());
   return ret;
 }
 
-LengthPrefixedArray<ArtMethod>* ClassLinker::AllocArtMethodArray(Thread* self, size_t length) {
+LengthPrefixedArray<ArtMethod>* ClassLinker::AllocArtMethodArray(Thread* self,
+                                                                 LinearAlloc* allocator,
+                                                                 size_t length) {
   if (length == 0) {
     return nullptr;
   }
@@ -2397,7 +2394,7 @@
   const size_t method_size = ArtMethod::Size(image_pointer_size_);
   const size_t storage_size =
       LengthPrefixedArray<ArtMethod>::ComputeSize(length, method_size, method_alignment);
-  void* array_storage = Runtime::Current()->GetLinearAlloc()->Alloc(self, storage_size);
+  void* array_storage = allocator->Alloc(self, storage_size);
   auto* ret = new (array_storage) LengthPrefixedArray<ArtMethod>(length);
   CHECK(ret != nullptr);
   for (size_t i = 0; i < length; ++i) {
@@ -2406,6 +2403,15 @@
   return ret;
 }
 
+LinearAlloc* ClassLinker::GetAllocatorForClassLoader(mirror::ClassLoader* class_loader) {
+  if (class_loader == nullptr) {
+    return Runtime::Current()->GetLinearAlloc();
+  }
+  LinearAlloc* allocator = class_loader->GetAllocator();
+  DCHECK(allocator != nullptr);
+  return allocator;
+}
+
 void ClassLinker::LoadClassMembers(Thread* self,
                                    const DexFile& dex_file,
                                    const uint8_t* class_data,
@@ -2418,8 +2424,11 @@
     // Load static fields.
     // We allow duplicate definitions of the same field in a class_data_item
     // but ignore the repeated indexes here, b/21868015.
+    LinearAlloc* const allocator = GetAllocatorForClassLoader(klass->GetClassLoader());
     ClassDataItemIterator it(dex_file, class_data);
-    LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self, it.NumStaticFields());
+    LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self,
+                                                                allocator,
+                                                                it.NumStaticFields());
     size_t num_sfields = 0;
     uint32_t last_field_idx = 0u;
     for (; it.HasNextStaticField(); it.Next()) {
@@ -2435,7 +2444,9 @@
     klass->SetSFieldsPtr(sfields);
     DCHECK_EQ(klass->NumStaticFields(), num_sfields);
     // Load instance fields.
-    LengthPrefixedArray<ArtField>* ifields = AllocArtFieldArray(self, it.NumInstanceFields());
+    LengthPrefixedArray<ArtField>* ifields = AllocArtFieldArray(self,
+                                                                allocator,
+                                                                it.NumInstanceFields());
     size_t num_ifields = 0u;
     last_field_idx = 0u;
     for (; it.HasNextInstanceField(); it.Next()) {
@@ -2458,8 +2469,8 @@
     klass->SetIFieldsPtr(ifields);
     DCHECK_EQ(klass->NumInstanceFields(), num_ifields);
     // Load methods.
-    klass->SetDirectMethodsPtr(AllocArtMethodArray(self, it.NumDirectMethods()));
-    klass->SetVirtualMethodsPtr(AllocArtMethodArray(self, it.NumVirtualMethods()));
+    klass->SetDirectMethodsPtr(AllocArtMethodArray(self, allocator, it.NumDirectMethods()));
+    klass->SetVirtualMethodsPtr(AllocArtMethodArray(self, allocator, it.NumVirtualMethods()));
     size_t class_def_method_index = 0;
     uint32_t last_dex_method_index = DexFile::kDexNoIndex;
     size_t last_class_def_method_index = 0;
@@ -2639,10 +2650,8 @@
                                                   const DexFile& dex_file,
                                                   bool allow_failure) {
   // Search assuming unique-ness of dex file.
-  JavaVMExt* const vm = self->GetJniEnv()->vm;
   for (jweak weak_root : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
-        vm->DecodeWeakGlobal(self, weak_root));
+    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
     if (dex_cache != nullptr && dex_cache->GetDexFile() == &dex_file) {
       return dex_cache;
     }
@@ -3031,7 +3040,7 @@
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   boot_class_table_.FreezeSnapshot();
   MoveClassTableToPreZygoteVisitor visitor;
-  VisitClassLoadersAndRemoveClearedLoaders(&visitor);
+  VisitClassLoaders(&visitor);
 }
 
 mirror::Class* ClassLinker::LookupClassFromImage(const char* descriptor) {
@@ -3414,9 +3423,12 @@
   mirror::Class* existing = InsertClass(descriptor.c_str(), klass.Get(), hash);
   CHECK(existing == nullptr);
 
+  // Needs to be after we insert the class so that the allocator field is set.
+  LinearAlloc* const allocator = GetAllocatorForClassLoader(klass->GetClassLoader());
+
   // Instance fields are inherited, but we add a couple of static fields...
   const size_t num_fields = 2;
-  LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self, num_fields);
+  LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self, allocator, num_fields);
   klass->SetSFieldsPtr(sfields);
 
   // 1. Create a static field 'interfaces' that holds the _declared_ interfaces implemented by
@@ -3433,7 +3445,7 @@
   throws_sfield.SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
 
   // Proxies have 1 direct method, the constructor
-  LengthPrefixedArray<ArtMethod>* directs = AllocArtMethodArray(self, 1);
+  LengthPrefixedArray<ArtMethod>* directs = AllocArtMethodArray(self, allocator, 1);
   // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
   // want to throw OOM in the future.
   if (UNLIKELY(directs == nullptr)) {
@@ -3448,7 +3460,7 @@
   DCHECK_EQ(h_methods->GetClass(), mirror::Method::ArrayClass())
       << PrettyClass(h_methods->GetClass());
   const size_t num_virtual_methods = h_methods->GetLength();
-  auto* virtuals = AllocArtMethodArray(self, num_virtual_methods);
+  auto* virtuals = AllocArtMethodArray(self, allocator, num_virtual_methods);
   // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
   // want to throw OOM in the future.
   if (UNLIKELY(virtuals == nullptr)) {
@@ -4166,9 +4178,16 @@
   if (class_table == nullptr) {
     class_table = new ClassTable;
     Thread* const self = Thread::Current();
-    class_loaders_.push_back(self->GetJniEnv()->vm->AddWeakGlobalRef(self, class_loader));
+    ClassLoaderData data;
+    data.weak_root = self->GetJniEnv()->vm->AddWeakGlobalRef(self, class_loader);
+    data.class_table = class_table;
+    data.allocator = Runtime::Current()->CreateLinearAlloc();
+    class_loaders_.push_back(data);
     // Don't already have a class table, add it to the class loader.
-    class_loader->SetClassTable(class_table);
+    CHECK(class_loader->GetClassTable() == nullptr);
+    CHECK(class_loader->GetAllocator() == nullptr);
+    class_loader->SetClassTable(data.class_table);
+    class_loader->SetAllocator(data.allocator);
   }
   return class_table;
 }
@@ -4244,6 +4263,11 @@
       ClassTable* const table = InsertClassTableForClassLoader(class_loader);
       mirror::Class* existing = table->UpdateClass(descriptor, h_new_class.Get(),
                                                    ComputeModifiedUtf8Hash(descriptor));
+      if (class_loader != nullptr) {
+        // We updated the class in the class table, perform the write barrier so that the GC knows
+        // about the change.
+        Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+      }
       CHECK_EQ(existing, klass.Get());
       if (kIsDebugBuild && class_loader == nullptr && dex_cache_image_class_lookup_required_) {
         // Check a class loaded with the system class loader matches one in the image if the class
@@ -6158,7 +6182,10 @@
 ArtMethod* ClassLinker::CreateRuntimeMethod() {
   const size_t method_alignment = ArtMethod::Alignment(image_pointer_size_);
   const size_t method_size = ArtMethod::Size(image_pointer_size_);
-  LengthPrefixedArray<ArtMethod>* method_array = AllocArtMethodArray(Thread::Current(), 1);
+  LengthPrefixedArray<ArtMethod>* method_array = AllocArtMethodArray(
+      Thread::Current(),
+      Runtime::Current()->GetLinearAlloc(),
+      1);
   ArtMethod* method = &method_array->At(0, method_size, method_alignment);
   CHECK(method != nullptr);
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
@@ -6171,33 +6198,33 @@
   find_array_class_cache_next_victim_ = 0;
 }
 
-void ClassLinker::VisitClassLoadersAndRemoveClearedLoaders(ClassLoaderVisitor* visitor) {
+void ClassLinker::VisitClassLoaders(ClassLoaderVisitor* visitor) const {
   Thread* const self = Thread::Current();
-  Locks::classlinker_classes_lock_->AssertExclusiveHeld(self);
-  JavaVMExt* const vm = self->GetJniEnv()->vm;
-  for (auto it = class_loaders_.begin(); it != class_loaders_.end();) {
-    const jweak weak_root = *it;
-    mirror::ClassLoader* const class_loader = down_cast<mirror::ClassLoader*>(
-        vm->DecodeWeakGlobal(self, weak_root));
+  for (const ClassLoaderData& data : class_loaders_) {
+    // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
+    auto* const class_loader = down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
     if (class_loader != nullptr) {
       visitor->Visit(class_loader);
-      ++it;
-    } else {
-      // Remove the cleared weak reference from the array.
-      vm->DeleteWeakGlobalRef(self, weak_root);
-      it = class_loaders_.erase(it);
     }
   }
 }
 
-void ClassLinker::VisitClassLoaders(ClassLoaderVisitor* visitor) const {
+void ClassLinker::CleanupClassLoaders() {
   Thread* const self = Thread::Current();
-  JavaVMExt* const vm = self->GetJniEnv()->vm;
-  for (jweak weak_root : class_loaders_) {
-    mirror::ClassLoader* const class_loader = down_cast<mirror::ClassLoader*>(
-        vm->DecodeWeakGlobal(self, weak_root));
+  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+  JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
+  for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
+    const ClassLoaderData& data = *it;
+    // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
+    auto* const class_loader = down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
     if (class_loader != nullptr) {
-      visitor->Visit(class_loader);
+      ++it;
+    } else {
+      // Weak reference was cleared, delete the data associated with this class loader.
+      delete data.class_table;
+      delete data.allocator;
+      vm->DeleteWeakGlobalRef(self, data.weak_root);
+      it = class_loaders_.erase(it);
     }
   }
 }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index fee7066..739403f 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -403,9 +403,13 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
-  LengthPrefixedArray<ArtField>* AllocArtFieldArray(Thread* self, size_t length);
+  LengthPrefixedArray<ArtField>* AllocArtFieldArray(Thread* self,
+                                                    LinearAlloc* allocator,
+                                                    size_t length);
 
-  LengthPrefixedArray<ArtMethod>* AllocArtMethodArray(Thread* self, size_t length);
+  LengthPrefixedArray<ArtMethod>* AllocArtMethodArray(Thread* self,
+                                                      LinearAlloc* allocator,
+                                                      size_t length);
 
   mirror::PointerArray* AllocPointerArray(Thread* self, size_t length)
       SHARED_REQUIRES(Locks::mutator_lock_)
@@ -546,17 +550,24 @@
   // entries are roots, but potentially not image classes.
   void DropFindArrayClassCache() SHARED_REQUIRES(Locks::mutator_lock_);
 
- private:
-  // The RemoveClearedLoaders version removes cleared weak global class loaders and frees their
-  // class tables. This version can only be called with reader access to the
-  // classlinker_classes_lock_ since it modifies the class_loaders_ list.
-  void VisitClassLoadersAndRemoveClearedLoaders(ClassLoaderVisitor* visitor)
-      REQUIRES(Locks::classlinker_classes_lock_)
+  // Clean up class loaders, this needs to happen after JNI weak globals are cleared.
+  void CleanupClassLoaders()
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Locks::classlinker_classes_lock_);
+
+  static LinearAlloc* GetAllocatorForClassLoader(mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+ private:
+  struct ClassLoaderData {
+    jweak weak_root;  // Weak root to enable class unloading.
+    ClassTable* class_table;
+    LinearAlloc* allocator;
+  };
+
   void VisitClassLoaders(ClassLoaderVisitor* visitor) const
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
-
   void VisitClassesInternal(ClassVisitor* visitor)
       SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
 
@@ -826,8 +837,8 @@
   std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_);
 
   // This contains the class loaders which have class tables. It is populated by
-  // InsertClassTableForClassLoader. Weak roots to enable class unloading.
-  std::list<jweak> class_loaders_
+  // InsertClassTableForClassLoader.
+  std::list<ClassLoaderData> class_loaders_
       GUARDED_BY(Locks::classlinker_classes_lock_);
 
   // Boot class path table. Since the class loader for this is null.
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index b4ea3b3..0926ce3 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -550,6 +550,7 @@
 
 struct ClassLoaderOffsets : public CheckOffsets<mirror::ClassLoader> {
   ClassLoaderOffsets() : CheckOffsets<mirror::ClassLoader>(false, "Ljava/lang/ClassLoader;") {
+    addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, allocator_), "allocator");
     addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, class_table_), "classTable");
     addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, packages_), "packages");
     addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, parent_), "parent");
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 450031a..d24b4fb 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -579,15 +579,11 @@
 
   Runtime* runtime = Runtime::Current();
   ScopedSuspendAll ssa(__FUNCTION__);
-  Thread* self = Thread::Current();
-  ThreadState old_state = self->SetStateUnsafe(kRunnable);
-  CHECK_NE(old_state, kRunnable);
   if (RequiresDeoptimization()) {
     runtime->GetInstrumentation()->EnableDeoptimization();
   }
   instrumentation_events_ = 0;
   gDebuggerActive = true;
-  CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   LOG(INFO) << "Debugger is active";
 }
 
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index 7344d13..e160a10 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -454,8 +454,8 @@
   return FormatOf(Opcode()) == k25x;
 }
 
-// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+1.
-inline void Instruction::GetAllArgs25x(uint32_t arg[kMaxVarArgRegs]) const {
+// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+2.
+inline void Instruction::GetAllArgs25x(uint32_t (&arg)[kMaxVarArgRegs25x]) const {
   DCHECK_EQ(FormatOf(Opcode()), k25x);
 
   /*
@@ -500,19 +500,21 @@
    */
   switch (count) {
     case 4:
-      arg[4] = (Fetch16(0) >> 8) & 0x0f;  // vG
+      arg[5] = (Fetch16(0) >> 8) & 0x0f;  // vG
       FALLTHROUGH_INTENDED;
     case 3:
-      arg[3] = (reg_list >> 12) & 0x0f;  // vF
+      arg[4] = (reg_list >> 12) & 0x0f;  // vF
       FALLTHROUGH_INTENDED;
     case 2:
-      arg[2] = (reg_list >> 8) & 0x0f;  // vE
+      arg[3] = (reg_list >> 8) & 0x0f;  // vE
       FALLTHROUGH_INTENDED;
     case 1:
-      arg[1] = (reg_list >> 4) & 0x0f;  // vD
+      arg[2] = (reg_list >> 4) & 0x0f;  // vD
       FALLTHROUGH_INTENDED;
     default:  // case 0
+      // The required lambda 'this' is actually a pair, but the pair is implicit.
       arg[0] = VRegC_25x();  // vC
+      arg[1] = arg[0] + 1;   // vC + 1
       break;
   }
 }
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index fc4df14..5250b0d 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -322,10 +322,10 @@
     }
     case k25x: {
       if (Opcode() == INVOKE_LAMBDA) {
-        uint32_t arg[kMaxVarArgRegs];
+        uint32_t arg[kMaxVarArgRegs25x];
         GetAllArgs25x(arg);
         const size_t num_extra_var_args = VRegB_25x();
-        DCHECK_LE(num_extra_var_args + 1, kMaxVarArgRegs);
+        DCHECK_LE(num_extra_var_args + 2, arraysize(arg));
 
         // invoke-lambda vC, {vD, vE, vF, vG}
         os << opcode << " v" << arg[0] << ", {";
@@ -333,7 +333,7 @@
           if (i != 0) {
             os << ", ";
           }
-          os << "v" << arg[i+1];
+          os << "v" << arg[i+2];  // Don't print the pair of vC registers. Pair is implicit.
         }
         os << "}";
         break;
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index df2d379..48a12e5 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -180,9 +180,11 @@
     kVerifyVarArgRangeNonZero = 0x100000,
     kVerifyRuntimeOnly        = 0x200000,
     kVerifyError              = 0x400000,
+    kVerifyRegCString         = 0x800000,
   };
 
   static constexpr uint32_t kMaxVarArgRegs = 5;
+  static constexpr uint32_t kMaxVarArgRegs25x = 6;  // lambdas are 2 registers.
 
   // Returns the size (in 2 byte code units) of this instruction.
   size_t SizeInCodeUnits() const {
@@ -408,7 +410,7 @@
   void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
     return GetVarArgs(args, Fetch16(0));
   }
-  void GetAllArgs25x(uint32_t args[kMaxVarArgRegs]) const;
+  void GetAllArgs25x(uint32_t (&args)[kMaxVarArgRegs25x]) const;
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
   // 16 bits of instruction.
@@ -536,7 +538,7 @@
 
   int GetVerifyTypeArgumentC() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField |
-        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide));
+        kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide | kVerifyRegCString));
   }
 
   int GetVerifyExtraFlags() const {
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index a176772..9d7e0c4 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -263,10 +263,10 @@
   V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kIndexNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \
   V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kIndexUnknown, 0, kVerifyError) \
-  V(0xF5, UNUSED_F5, "unused-f5", k10x, false, kIndexUnknown, 0, kVerifyError) \
+  V(0xF5, CAPTURE_VARIABLE, "capture-variable", k21c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegBString) \
   /* TODO(iam): get rid of the unused 'false' column */ \
   V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kIndexMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \
-  V(0xF7, UNUSED_F7, "unused-f7", k10x, false, kIndexUnknown, 0, kVerifyError) \
+  V(0xF7, LIBERATE_VARIABLE, "liberate-variable", k22c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCString) \
   V(0xF8, BOX_LAMBDA, "box-lambda", k22x, true, kIndexNone, kContinue | kExperimental, kVerifyRegA | kVerifyRegB) \
   V(0xF9, UNBOX_LAMBDA, "unbox-lambda", k22c, true, kIndexTypeRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
   V(0xFA, UNUSED_FA, "unused-fa", k10x, false, kIndexUnknown, 0, kVerifyError) \
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index cc3eefe..8ae0b07 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -71,44 +71,6 @@
       *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type);
 }
 
-inline ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp,
-                                            Runtime::CalleeSaveType type,
-                                            bool do_caller_check = false)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(type));
-
-  const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type);
-  auto** caller_sp = reinterpret_cast<ArtMethod**>(
-      reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
-  ArtMethod* outer_method = *caller_sp;
-  ArtMethod* caller = outer_method;
-
-  if ((outer_method != nullptr) && outer_method->IsOptimized(sizeof(void*))) {
-    const size_t callee_return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, type);
-    uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(
-        (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset));
-    uintptr_t native_pc_offset = outer_method->NativeQuickPcOffset(caller_pc);
-    CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
-    StackMapEncoding encoding = code_info.ExtractEncoding();
-    StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-    DCHECK(stack_map.IsValid());
-    if (stack_map.HasInlineInfo(encoding)) {
-      InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-      caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
-    }
-  }
-
-  if (kIsDebugBuild && do_caller_check) {
-    // Note that do_caller_check is optional, as this method can be called by
-    // stubs, and tests without a proper call stack.
-    NthCallerVisitor visitor(Thread::Current(), 1, true);
-    visitor.WalkStack();
-    CHECK_EQ(caller, visitor.caller);
-  }
-
-  return caller;
-}
-
 inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   return GetCalleeSaveMethodCaller(
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 94aced2..f193999 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -21,11 +21,15 @@
 #include "base/mutex.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
+#include "entrypoints/entrypoint_utils-inl.h"
+#include "entrypoints/quick/callee_save_frame.h"
+#include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "nth_caller_visitor.h"
 #include "reflection.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -345,4 +349,54 @@
   return true;
 }
 
+ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp,
+                                     Runtime::CalleeSaveType type,
+                                     bool do_caller_check)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(type));
+
+  const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type);
+  auto** caller_sp = reinterpret_cast<ArtMethod**>(
+      reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
+  ArtMethod* outer_method = *caller_sp;
+  ArtMethod* caller = outer_method;
+
+  if ((outer_method != nullptr) && outer_method->IsOptimized(sizeof(void*))) {
+    const size_t callee_return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, type);
+    uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>(
+        (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset));
+    if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
+      uintptr_t native_pc_offset = outer_method->NativeQuickPcOffset(caller_pc);
+      CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
+      StackMapEncoding encoding = code_info.ExtractEncoding();
+      StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+      DCHECK(stack_map.IsValid());
+      if (stack_map.HasInlineInfo(encoding)) {
+        InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+        caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
+      }
+    } else {
+      // We're instrumenting, just use the StackVisitor which knows how to
+      // handle instrumented frames.
+      NthCallerVisitor visitor(Thread::Current(), 1, true);
+      visitor.WalkStack();
+      caller = visitor.caller;
+      if (kIsDebugBuild) {
+        // Avoid doing the check below.
+        do_caller_check = false;
+      }
+    }
+  }
+
+  if (kIsDebugBuild && do_caller_check) {
+    // Note that do_caller_check is optional, as this method can be called by
+    // stubs, and tests without a proper call stack.
+    NthCallerVisitor visitor(Thread::Current(), 1, true);
+    visitor.WalkStack();
+    CHECK_EQ(caller, visitor.caller);
+  }
+
+  return caller;
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 53f2677..4217cab 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -26,6 +26,7 @@
 #include "gc/allocator_type.h"
 #include "invoke_type.h"
 #include "jvalue.h"
+#include "runtime.h"
 
 namespace art {
 
@@ -179,6 +180,10 @@
 template <typename INT_TYPE, typename FLOAT_TYPE>
 inline INT_TYPE art_float_to_integral(FLOAT_TYPE f);
 
+ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp,
+                                     Runtime::CalleeSaveType type,
+                                     bool do_caller_check = false);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index d749664..dfd9fcd 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -22,13 +22,16 @@
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
+#include "quick_exception_handler.h"
 #include "stack.h"
 #include "thread.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
 
-NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+
   if (VLOG_IS_ON(deopt)) {
     LOG(INFO) << "Deopting:";
     self->Dump(LOG(INFO));
@@ -39,19 +42,26 @@
   self->QuickDeliverException();
 }
 
-extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-  artDeoptimizeImpl(self);
-}
-
 extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
+
+  // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
+  // specialized visitor that will show whether a method is Quick or Shadow.
+
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
   self->PushDeoptimizationContext(return_value, false, self->GetException());
-  artDeoptimizeImpl(self);
+
+  QuickExceptionHandler exception_handler(self, true);
+  exception_handler.DeoptimizeSingleFrame();
+  exception_handler.UpdateInstrumentationStack();
+  exception_handler.DeoptimizeSingleFrameArchDependentFixup();
+  // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
+  // be caller-saved. This has the downside that we cannot track incorrect register usage down the
+  // line.
+  exception_handler.DoLongJump(false);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index f69c39e..fc5c52e 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -112,4 +112,61 @@
   return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self);
 }
 
+extern uint64_t GenericJniMethodEnd(Thread* self,
+                                    uint32_t saved_local_ref_cookie,
+                                    jvalue result,
+                                    uint64_t result_f,
+                                    ArtMethod* called,
+                                    HandleScope* handle_scope)
+    // TODO: NO_THREAD_SAFETY_ANALYSIS as GoToRunnable() is NO_THREAD_SAFETY_ANALYSIS
+    NO_THREAD_SAFETY_ANALYSIS {
+  GoToRunnable(self);
+  // We need the mutator lock (i.e., calling GoToRunnable()) before accessing the shorty or the
+  // locked object.
+  jobject locked = called->IsSynchronized() ? handle_scope->GetHandle(0).ToJObject() : nullptr;
+  char return_shorty_char = called->GetShorty()[0];
+  if (return_shorty_char == 'L') {
+    if (locked != nullptr) {
+      UnlockJniSynchronizedMethod(locked, self);
+    }
+    return reinterpret_cast<uint64_t>(JniMethodEndWithReferenceHandleResult(
+        result.l, saved_local_ref_cookie, self));
+  } else {
+    if (locked != nullptr) {
+      UnlockJniSynchronizedMethod(locked, self);  // Must decode before pop.
+    }
+    PopLocalReferences(saved_local_ref_cookie, self);
+    switch (return_shorty_char) {
+      case 'F': {
+        if (kRuntimeISA == kX86) {
+          // Convert back the result to float.
+          double d = bit_cast<double, uint64_t>(result_f);
+          return bit_cast<uint32_t, float>(static_cast<float>(d));
+        } else {
+          return result_f;
+        }
+      }
+      case 'D':
+        return result_f;
+      case 'Z':
+        return result.z;
+      case 'B':
+        return result.b;
+      case 'C':
+        return result.c;
+      case 'S':
+        return result.s;
+      case 'I':
+        return result.i;
+      case 'J':
+        return result.j;
+      case 'V':
+        return 0;
+      default:
+        LOG(FATAL) << "Unexpected return shorty character " << return_shorty_char;
+        return 0;
+    }
+  }
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1302c5f..5d3ac73 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -29,8 +29,10 @@
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "quick_exception_handler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
+#include "stack.h"
 #include "debugger.h"
 
 namespace art {
@@ -646,27 +648,86 @@
   if (method->IsAbstract()) {
     ThrowAbstractMethodError(method);
     return 0;
+  }
+
+  JValue tmp_value;
+  ShadowFrame* deopt_frame = self->PopStackedShadowFrame(
+      StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false);
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
+  DCHECK(code_item != nullptr) << PrettyMethod(method);
+  ManagedStack fragment;
+
+  DCHECK(!method->IsNative()) << PrettyMethod(method);
+  uint32_t shorty_len = 0;
+  auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*));
+  const char* shorty = non_proxy_method->GetShorty(&shorty_len);
+
+  JValue result;
+
+  if (deopt_frame != nullptr) {
+    // Coming from single-frame deopt.
+
+    if (kIsDebugBuild) {
+      // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom
+      // of the call-stack) corresponds to the called method.
+      ShadowFrame* linked = deopt_frame;
+      while (linked->GetLink() != nullptr) {
+        linked = linked->GetLink();
+      }
+      CHECK_EQ(method, linked->GetMethod()) << PrettyMethod(method) << " "
+          << PrettyMethod(linked->GetMethod());
+    }
+
+    if (VLOG_IS_ON(deopt)) {
+      // Print out the stack to verify that it was a single-frame deopt.
+      LOG(INFO) << "Continue-ing from deopt. Stack is:";
+      QuickExceptionHandler::DumpFramesWithType(self, true);
+    }
+
+    mirror::Throwable* pending_exception = nullptr;
+    self->PopDeoptimizationContext(&result, &pending_exception);
+
+    // Push a transition back into managed code onto the linked list in thread.
+    self->PushManagedStackFragment(&fragment);
+
+    // Ensure that the stack is still in order.
+    if (kIsDebugBuild) {
+      class DummyStackVisitor : public StackVisitor {
+       public:
+        explicit DummyStackVisitor(Thread* self_in) SHARED_REQUIRES(Locks::mutator_lock_)
+            : StackVisitor(self_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
+
+        bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+          // Nothing to do here. In a debug build, SanityCheckFrame will do the work in the walking
+          // logic. Just always say we want to continue.
+          return true;
+        }
+      };
+      DummyStackVisitor dsv(self);
+      dsv.WalkStack();
+    }
+
+    // Restore the exception that was pending before deoptimization then interpret the
+    // deoptimized frames.
+    if (pending_exception != nullptr) {
+      self->SetException(pending_exception);
+    }
+    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result);
   } else {
-    DCHECK(!method->IsNative()) << PrettyMethod(method);
     const char* old_cause = self->StartAssertNoThreadSuspension(
         "Building interpreter shadow frame");
-    const DexFile::CodeItem* code_item = method->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(method);
     uint16_t num_regs = code_item->registers_size_;
-    void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
     // No last shadow coming from quick.
-    ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, nullptr, method, 0, memory));
+    ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+        CREATE_SHADOW_FRAME(num_regs, nullptr, method, 0);
+    ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
     size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_;
-    uint32_t shorty_len = 0;
-    auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*));
-    const char* shorty = non_proxy_method->GetShorty(&shorty_len);
     BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len,
                                                       shadow_frame, first_arg_reg);
     shadow_frame_builder.VisitArguments();
     const bool needs_initialization =
         method->IsStatic() && !method->GetDeclaringClass()->IsInitialized();
     // Push a transition back into managed code onto the linked list in thread.
-    ManagedStack fragment;
     self->PushManagedStackFragment(&fragment);
     self->PushShadowFrame(shadow_frame);
     self->EndAssertNoThreadSuspension(old_cause);
@@ -681,24 +742,26 @@
         return 0;
       }
     }
-    JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
-    // Pop transition.
-    self->PopManagedStackFragment(fragment);
 
-    // Request a stack deoptimization if needed
-    ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
-    if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
-      // Push the context of the deoptimization stack so we can restore the return value and the
-      // exception before executing the deoptimized frames.
-      self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
-
-      // Set special exception to cause deoptimization.
-      self->SetException(Thread::GetDeoptimizationException());
-    }
-
-    // No need to restore the args since the method has already been run by the interpreter.
-    return result.GetJ();
+    result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
   }
+
+  // Pop transition.
+  self->PopManagedStackFragment(fragment);
+
+  // Request a stack deoptimization if needed
+  ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
+  if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+    // Push the context of the deoptimization stack so we can restore the return value and the
+    // exception before executing the deoptimized frames.
+    self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+
+    // Set special exception to cause deoptimization.
+    self->SetException(Thread::GetDeoptimizationException());
+  }
+
+  // No need to restore the args since the method has already been run by the interpreter.
+  return result.GetJ();
 }
 
 // Visits arguments on the stack placing them into the args vector, Object* arguments are converted
@@ -1926,62 +1989,27 @@
                                 reinterpret_cast<uintptr_t>(nativeCode));
 }
 
+// Defined in quick_jni_entrypoints.cc.
+extern uint64_t GenericJniMethodEnd(Thread* self, uint32_t saved_local_ref_cookie,
+                                    jvalue result, uint64_t result_f, ArtMethod* called,
+                                    HandleScope* handle_scope);
 /*
  * Is called after the native JNI code. Responsible for cleanup (handle scope, saved state) and
  * unlocking.
  */
-extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, jvalue result, uint64_t result_f)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self,
+                                                    jvalue result,
+                                                    uint64_t result_f) {
+  // We're here just back from a native call. We don't have the shared mutator lock at this point
+  // yet until we call GoToRunnable() later in GenericJniMethodEnd(). Accessing objects or doing
+  // anything that requires a mutator lock before that would cause problems as GC may have the
+  // exclusive mutator lock and may be moving objects, etc.
   ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame();
   uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp);
   ArtMethod* called = *sp;
   uint32_t cookie = *(sp32 - 1);
-
-  jobject lock = nullptr;
-  if (called->IsSynchronized()) {
-    HandleScope* table = reinterpret_cast<HandleScope*>(reinterpret_cast<uint8_t*>(sp)
-        + sizeof(*sp));
-    lock = table->GetHandle(0).ToJObject();
-  }
-
-  char return_shorty_char = called->GetShorty()[0];
-
-  if (return_shorty_char == 'L') {
-    return artQuickGenericJniEndJNIRef(self, cookie, result.l, lock);
-  } else {
-    artQuickGenericJniEndJNINonRef(self, cookie, lock);
-
-    switch (return_shorty_char) {
-      case 'F': {
-        if (kRuntimeISA == kX86) {
-          // Convert back the result to float.
-          double d = bit_cast<double, uint64_t>(result_f);
-          return bit_cast<uint32_t, float>(static_cast<float>(d));
-        } else {
-          return result_f;
-        }
-      }
-      case 'D':
-        return result_f;
-      case 'Z':
-        return result.z;
-      case 'B':
-        return result.b;
-      case 'C':
-        return result.c;
-      case 'S':
-        return result.s;
-      case 'I':
-        return result.i;
-      case 'J':
-        return result.j;
-      case 'V':
-        return 0;
-      default:
-        LOG(FATAL) << "Unexpected return shorty character " << return_shorty_char;
-        return 0;
-    }
-  }
+  HandleScope* table = reinterpret_cast<HandleScope*>(reinterpret_cast<uint8_t*>(sp) + sizeof(*sp));
+  return GenericJniMethodEnd(self, cookie, result, result_f, called, table);
 }
 
 // We use TwoWordReturn to optimize scalar returns. We use the hi value for code, and the lo value
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h
index 25fdd7c..2510514 100644
--- a/runtime/gc/allocator/rosalloc-inl.h
+++ b/runtime/gc/allocator/rosalloc-inl.h
@@ -53,13 +53,7 @@
 }
 
 inline bool RosAlloc::Run::IsFull() {
-  const size_t num_vec = NumberOfBitmapVectors();
-  for (size_t v = 0; v < num_vec; ++v) {
-    if (~alloc_bit_map_[v] != 0) {
-      return false;
-    }
-  }
-  return true;
+  return free_list_.Size() == 0;
 }
 
 inline bool RosAlloc::CanAllocFromThreadLocalRun(Thread* self, size_t size) {
@@ -120,45 +114,14 @@
 }
 
 inline void* RosAlloc::Run::AllocSlot() {
-  const size_t idx = size_bracket_idx_;
-  while (true) {
-    if (kIsDebugBuild) {
-      // Make sure that no slots leaked, the bitmap should be full for all previous vectors.
-      for (size_t i = 0; i < first_search_vec_idx_; ++i) {
-        CHECK_EQ(~alloc_bit_map_[i], 0U);
-      }
-    }
-    uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_];
-    uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr);
-    if (LIKELY(ffz1 != 0)) {
-      const uint32_t ffz = ffz1 - 1;
-      const uint32_t slot_idx = ffz +
-          first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte;
-      const uint32_t mask = 1U << ffz;
-      DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range";
-      // Found an empty slot. Set the bit.
-      DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U);
-      *alloc_bitmap_ptr |= mask;
-      DCHECK_NE(*alloc_bitmap_ptr & mask, 0U);
-      uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) +
-          headerSizes[idx] + slot_idx * bracketSizes[idx];
-      if (kTraceRosAlloc) {
-        LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex
-                  << reinterpret_cast<intptr_t>(slot_addr)
-                  << ", bracket_size=" << std::dec << bracketSizes[idx]
-                  << ", slot_idx=" << slot_idx;
-      }
-      return slot_addr;
-    }
-    const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32;
-    if (first_search_vec_idx_ + 1 >= num_words) {
-      DCHECK(IsFull());
-      // Already at the last word, return null.
-      return nullptr;
-    }
-    // Increase the index to the next word and try again.
-    ++first_search_vec_idx_;
+  Slot* slot = free_list_.Remove();
+  if (kTraceRosAlloc && slot != nullptr) {
+    const uint8_t idx = size_bracket_idx_;
+    LOG(INFO) << "RosAlloc::Run::AllocSlot() : " << slot
+              << ", bracket_size=" << std::dec << bracketSizes[idx]
+              << ", slot_idx=" << SlotIndex(slot);
   }
+  return slot;
 }
 
 }  // namespace allocator
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 470bc1c..9c8e4df 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -35,7 +35,7 @@
 namespace gc {
 namespace allocator {
 
-static constexpr bool kUsePrefetchDuringAllocRun = true;
+static constexpr bool kUsePrefetchDuringAllocRun = false;
 static constexpr bool kPrefetchNewRunDataByZeroing = false;
 static constexpr size_t kPrefetchStride = 64;
 
@@ -43,8 +43,6 @@
 size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
 size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
 size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
-size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
-size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
 bool RosAlloc::initialized_ = false;
 size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 };
 RosAlloc::Run* RosAlloc::dedicated_full_run_ =
@@ -556,9 +554,7 @@
       new_run->magic_num_ = kMagicNum;
     }
     new_run->size_bracket_idx_ = idx;
-    new_run->SetAllocBitMapBitsForInvalidSlots();
     DCHECK(!new_run->IsThreadLocal());
-    DCHECK_EQ(new_run->first_search_vec_idx_, 0U);
     DCHECK(!new_run->to_be_bulk_freed_);
     if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) {
       // Take ownership of the cache lines if we are likely to be thread local run.
@@ -576,6 +572,7 @@
         }
       }
     }
+    new_run->InitFreeList();
   }
   return new_run;
 }
@@ -695,15 +692,11 @@
       MutexLock mu(self, *size_bracket_locks_[idx]);
       bool is_all_free_after_merge;
       // This is safe to do for the dedicated_full_run_ since the bitmaps are empty.
-      if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
+      if (thread_local_run->MergeThreadLocalFreeListToFreeList(&is_all_free_after_merge)) {
         DCHECK_NE(thread_local_run, dedicated_full_run_);
         // Some slot got freed. Keep it.
         DCHECK(!thread_local_run->IsFull());
         DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
-        if (is_all_free_after_merge) {
-          // Check that the bitmap idx is back at 0 if it's all free.
-          DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U);
-        }
       } else {
         // No slots got freed. Try to refill the thread-local run.
         DCHECK(thread_local_run->IsFull());
@@ -792,7 +785,7 @@
     DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
-    run->MarkThreadLocalFreeBitMap(ptr);
+    run->AddToThreadLocalFreeList(ptr);
     if (kTraceRosAlloc) {
       LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
                 << reinterpret_cast<intptr_t>(run);
@@ -818,7 +811,7 @@
     }
     DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
     DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
-    run->ZeroHeader();
+    run->ZeroHeaderAndSlotHeaders();
     {
       MutexLock lock_mu(self, lock_);
       FreePages(self, run, true);
@@ -853,271 +846,145 @@
   return bracket_size;
 }
 
-std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
-  std::string bit_map_str;
-  for (size_t v = 0; v < num_vec; v++) {
-    uint32_t vec = bit_map_base[v];
-    if (v != num_vec - 1) {
-      bit_map_str.append(StringPrintf("%x-", vec));
+template<bool kUseTail>
+std::string RosAlloc::Run::FreeListToStr(SlotFreeList<kUseTail>* free_list) {
+  std::string free_list_str;
+  const uint8_t idx = size_bracket_idx_;
+  const size_t bracket_size = bracketSizes[idx];
+  for (Slot* slot = free_list->Head(); slot != nullptr; slot = slot->Next()) {
+    bool is_last = slot->Next() == nullptr;
+    uintptr_t slot_offset = reinterpret_cast<uintptr_t>(slot) -
+        reinterpret_cast<uintptr_t>(FirstSlot());
+    DCHECK_EQ(slot_offset % bracket_size, 0U);
+    uintptr_t slot_idx = slot_offset / bracket_size;
+    if (!is_last) {
+      free_list_str.append(StringPrintf("%u-", static_cast<uint32_t>(slot_idx)));
     } else {
-      bit_map_str.append(StringPrintf("%x", vec));
+      free_list_str.append(StringPrintf("%u", static_cast<uint32_t>(slot_idx)));
     }
   }
-  return bit_map_str.c_str();
+  return free_list_str;
 }
 
 std::string RosAlloc::Run::Dump() {
   size_t idx = size_bracket_idx_;
-  size_t num_slots = numOfSlots[idx];
-  size_t num_vec = RoundUp(num_slots, 32) / 32;
   std::ostringstream stream;
   stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
          << "{ magic_num=" << static_cast<int>(magic_num_)
          << " size_bracket_idx=" << idx
          << " is_thread_local=" << static_cast<int>(is_thread_local_)
          << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
-         << " first_search_vec_idx=" << first_search_vec_idx_
-         << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
-         << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
-         << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
+         << " free_list=" << FreeListToStr(&free_list_)
+         << " bulk_free_list=" << FreeListToStr(&bulk_free_list_)
+         << " thread_local_list=" << FreeListToStr(&thread_local_free_list_)
          << " }" << std::endl;
   return stream.str();
 }
 
+inline size_t RosAlloc::Run::SlotIndex(Slot* slot) {
+  const uint8_t idx = size_bracket_idx_;
+  const size_t bracket_size = bracketSizes[idx];
+  const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
+      - reinterpret_cast<uint8_t*>(FirstSlot());
+  DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
+  size_t slot_idx = offset_from_slot_base / bracket_size;
+  DCHECK_LT(slot_idx, numOfSlots[idx]);
+  return slot_idx;
+}
+
 void RosAlloc::Run::FreeSlot(void* ptr) {
   DCHECK(!IsThreadLocal());
   const uint8_t idx = size_bracket_idx_;
   const size_t bracket_size = bracketSizes[idx];
-  const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
-      - (reinterpret_cast<uint8_t*>(this) + headerSizes[idx]);
-  DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
-  size_t slot_idx = offset_from_slot_base / bracket_size;
-  DCHECK_LT(slot_idx, numOfSlots[idx]);
-  size_t vec_idx = slot_idx / 32;
-  if (kIsDebugBuild) {
-    size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
-    DCHECK_LT(vec_idx, num_vec);
-  }
-  size_t vec_off = slot_idx % 32;
-  uint32_t* vec = &alloc_bit_map_[vec_idx];
-  first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx));
-  const uint32_t mask = 1U << vec_off;
-  DCHECK_NE(*vec & mask, 0U);
-  *vec &= ~mask;
-  DCHECK_EQ(*vec & mask, 0U);
+  Slot* slot = ToSlot(ptr);
   // Zero out the memory.
   // TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16.
-  memset(ptr, 0, bracket_size);
+  memset(slot, 0, bracket_size);
+  free_list_.Add(slot);
   if (kTraceRosAlloc) {
-    LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
-              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    LOG(INFO) << "RosAlloc::Run::FreeSlot() : " << slot
+              << ", bracket_size=" << std::dec << bracket_size << ", slot_idx=" << SlotIndex(slot);
   }
 }
 
-size_t RosAlloc::Run::NumberOfFreeSlots() {
-  size_t num_alloc_slots = 0;
-  const size_t idx = size_bracket_idx_;
-  const size_t num_slots = numOfSlots[idx];
-  const size_t num_vec = RoundUp(num_slots, 32) / 32;
-  DCHECK_NE(num_vec, 0U);
-  for (size_t v = 0; v < num_vec - 1; v++) {
-    num_alloc_slots += POPCOUNT(alloc_bit_map_[v]);
-  }
-  // Don't count the invalid bits in the last vector.
-  uint32_t last_vec_masked = alloc_bit_map_[num_vec - 1] &
-      ~GetBitmapLastVectorMask(num_slots, num_vec);
-  num_alloc_slots += POPCOUNT(last_vec_masked);
-  size_t num_free_slots = num_slots - num_alloc_slots;
-  DCHECK_LE(num_alloc_slots, num_slots);
-  DCHECK_LE(num_free_slots, num_slots);
-  return num_free_slots;
-}
-
-inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
+inline bool RosAlloc::Run::MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out) {
   DCHECK(IsThreadLocal());
-  // Free slots in the alloc bit map based on the thread local free bit map.
-  const size_t idx = size_bracket_idx_;
-  const size_t num_of_slots = numOfSlots[idx];
-  const size_t num_vec = RoundUp(num_of_slots, 32) / 32;
-  bool changed = false;
-  uint32_t* vecp = &alloc_bit_map_[0];
-  uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
-  bool is_all_free_after = true;
-  for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
-    uint32_t tl_free_vec = *tl_free_vecp;
-    uint32_t vec_before = *vecp;
-    uint32_t vec_after;
-    if (tl_free_vec != 0) {
-      first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
-      vec_after = vec_before & ~tl_free_vec;
-      *vecp = vec_after;
-      changed = true;
-      *tl_free_vecp = 0;  // clear the thread local free bit map.
-    } else {
-      vec_after = vec_before;
-    }
-    if (vec_after != 0) {
-      if (v == num_vec - 1) {
-        // Only not all free if a bit other than the mask bits are set.
-        is_all_free_after =
-            is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after;
-      } else {
-        is_all_free_after = false;
-      }
-    }
-    DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
-  }
-  *is_all_free_after_out = is_all_free_after;
-  // Return true if there was at least a bit set in the thread-local
-  // free bit map and at least a bit in the alloc bit map changed.
-  return changed;
-}
-
-inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
-  DCHECK(!IsThreadLocal());
-  // Free slots in the alloc bit map based on the bulk free bit map.
-  const size_t num_vec = NumberOfBitmapVectors();
-  uint32_t* vecp = &alloc_bit_map_[0];
-  uint32_t* free_vecp = &BulkFreeBitMap()[0];
-  for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
-    uint32_t free_vec = *free_vecp;
-    if (free_vec != 0) {
-      first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v));
-      *vecp &= ~free_vec;
-      *free_vecp = 0;  // clear the bulk free bit map.
-    }
-    DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
-  }
-}
-
-inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
-  DCHECK(IsThreadLocal());
-  // Union the thread local bit map with the bulk free bit map.
-  size_t num_vec = NumberOfBitmapVectors();
-  uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
-  uint32_t* from_vecp = &BulkFreeBitMap()[0];
-  for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
-    uint32_t from_vec = *from_vecp;
-    if (from_vec != 0) {
-      *to_vecp |= from_vec;
-      *from_vecp = 0;  // clear the bulk free bit map.
-    }
-    DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
-  }
-}
-
-inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
-  DCHECK(IsThreadLocal());
-  MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
-}
-
-inline size_t RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
-  return MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
-}
-
-inline size_t RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
-                                                  const char* caller_name) {
+  // Merge the thread local free list into the free list and clear the thread local free list.
   const uint8_t idx = size_bracket_idx_;
-  const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
-      - (reinterpret_cast<uint8_t*>(this) + headerSizes[idx]);
+  bool thread_local_free_list_size = thread_local_free_list_.Size();
+  const size_t size_before = free_list_.Size();
+  free_list_.Merge(&thread_local_free_list_);
+  const size_t size_after = free_list_.Size();
+  DCHECK_EQ(size_before < size_after, thread_local_free_list_size > 0);
+  DCHECK_LE(size_before, size_after);
+  *is_all_free_after_out = free_list_.Size() == numOfSlots[idx];
+  // Return true at least one slot was added to the free list.
+  return size_before < size_after;
+}
+
+inline void RosAlloc::Run::MergeBulkFreeListToFreeList() {
+  DCHECK(!IsThreadLocal());
+  // Merge the bulk free list into the free list and clear the bulk free list.
+  free_list_.Merge(&bulk_free_list_);
+}
+
+inline void RosAlloc::Run::MergeBulkFreeListToThreadLocalFreeList() {
+  DCHECK(IsThreadLocal());
+  // Merge the bulk free list into the thread local free list and clear the bulk free list.
+  thread_local_free_list_.Merge(&bulk_free_list_);
+}
+
+inline void RosAlloc::Run::AddToThreadLocalFreeList(void* ptr) {
+  DCHECK(IsThreadLocal());
+  AddToFreeListShared(ptr, &thread_local_free_list_, __FUNCTION__);
+}
+
+inline size_t RosAlloc::Run::AddToBulkFreeList(void* ptr) {
+  return AddToFreeListShared(ptr, &bulk_free_list_, __FUNCTION__);
+}
+
+inline size_t RosAlloc::Run::AddToFreeListShared(void* ptr,
+                                                 SlotFreeList<true>* free_list,
+                                                 const char* caller_name) {
+  const uint8_t idx = size_bracket_idx_;
   const size_t bracket_size = bracketSizes[idx];
-  memset(ptr, 0, bracket_size);
-  DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
-  size_t slot_idx = offset_from_slot_base / bracket_size;
-  DCHECK_LT(slot_idx, numOfSlots[idx]);
-  size_t vec_idx = slot_idx / 32;
-  if (kIsDebugBuild) {
-    size_t num_vec = NumberOfBitmapVectors();
-    DCHECK_LT(vec_idx, num_vec);
-  }
-  size_t vec_off = slot_idx % 32;
-  uint32_t* vec = &free_bit_map_base[vec_idx];
-  const uint32_t mask = 1U << vec_off;
-  DCHECK_EQ(*vec & mask, 0U);
-  *vec |= mask;
-  DCHECK_NE(*vec & mask, 0U);
+  Slot* slot = ToSlot(ptr);
+  memset(slot, 0, bracket_size);
+  free_list->Add(slot);
   if (kTraceRosAlloc) {
-    LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
-              << reinterpret_cast<intptr_t>(ptr)
-              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : " << ptr
+              << ", bracket_size=" << std::dec << bracket_size << ", slot_idx=" << SlotIndex(slot);
   }
   return bracket_size;
 }
 
-inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) {
-  const size_t kBitsPerVec = 32;
-  DCHECK_GE(num_vec * kBitsPerVec, num_slots);
-  DCHECK_NE(num_vec, 0U);
-  size_t remain = num_vec * kBitsPerVec - num_slots;
-  DCHECK_LT(remain, kBitsPerVec);
-  return ((1U << remain) - 1) << ((kBitsPerVec - remain) & 0x1F);
-}
-
-inline bool RosAlloc::Run::IsAllFree() {
+inline void RosAlloc::Run::ZeroHeaderAndSlotHeaders() {
+  DCHECK(IsAllFree());
   const uint8_t idx = size_bracket_idx_;
-  const size_t num_slots = numOfSlots[idx];
-  const size_t num_vec = NumberOfBitmapVectors();
-  DCHECK_NE(num_vec, 0U);
-  // Check the last vector after the loop since it uses a special case for the masked bits.
-  for (size_t v = 0; v < num_vec - 1; v++) {
-    uint32_t vec = alloc_bit_map_[v];
-    if (vec != 0) {
-      return false;
-    }
+  // Zero the slot header (next pointers).
+  for (Slot* slot = free_list_.Head(); slot != nullptr; ) {
+    Slot* next_slot = slot->Next();
+    slot->Clear();
+    slot = next_slot;
   }
-  // Make sure the last word is equal to the mask, all other bits must be 0.
-  return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec);
-}
-
-inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
-  const size_t num_vec = NumberOfBitmapVectors();
-  for (size_t v = 0; v < num_vec; v++) {
-    uint32_t vec = BulkFreeBitMap()[v];
-    if (vec != 0) {
-      return false;
-    }
-  }
-  return true;
-}
-
-inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
-  const size_t num_vec = NumberOfBitmapVectors();
-  for (size_t v = 0; v < num_vec; v++) {
-    uint32_t vec = ThreadLocalFreeBitMap()[v];
-    if (vec != 0) {
-      return false;
-    }
-  }
-  return true;
-}
-
-inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() {
-  const size_t idx = size_bracket_idx_;
-  const size_t num_slots = numOfSlots[idx];
-  const size_t num_vec = RoundUp(num_slots, 32) / 32;
-  DCHECK_NE(num_vec, 0U);
-  // Make sure to set the bits at the end of the bitmap so that we don't allocate there since they
-  // don't represent valid slots.
-  alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec);
-}
-
-inline void RosAlloc::Run::ZeroHeader() {
-  const uint8_t idx = size_bracket_idx_;
+  // Zero the header.
   memset(this, 0, headerSizes[idx]);
+  // Check that the entire run is all zero.
+  if (kIsDebugBuild) {
+    const size_t size = numOfPages[idx] * kPageSize;
+    const uintptr_t* word_ptr = reinterpret_cast<uintptr_t*>(this);
+    for (size_t i = 0; i < size / sizeof(uintptr_t); ++i) {
+      CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i;
+    }
+  }
 }
 
 inline void RosAlloc::Run::ZeroData() {
   const uint8_t idx = size_bracket_idx_;
-  uint8_t* slot_begin = reinterpret_cast<uint8_t*>(this) + headerSizes[idx];
+  uint8_t* slot_begin = reinterpret_cast<uint8_t*>(FirstSlot());
   memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]);
 }
 
-inline void RosAlloc::Run::FillAllocBitMap() {
-  size_t num_vec = NumberOfBitmapVectors();
-  memset(alloc_bit_map_, 0xFF, sizeof(uint32_t) * num_vec);
-  first_search_vec_idx_ = num_vec - 1;  // No free bits in any of the bitmap words.
-}
-
 void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
                                     void* arg) {
   size_t idx = size_bracket_idx_;
@@ -1126,26 +993,27 @@
   size_t bracket_size = IndexToBracketSize(idx);
   DCHECK_EQ(slot_base + num_slots * bracket_size,
             reinterpret_cast<uint8_t*>(this) + numOfPages[idx] * kPageSize);
-  size_t num_vec = RoundUp(num_slots, 32) / 32;
-  size_t slots = 0;
-  const uint32_t* const tl_free_vecp = IsThreadLocal() ? ThreadLocalFreeBitMap() : nullptr;
-  for (size_t v = 0; v < num_vec; v++, slots += 32) {
-    DCHECK_GE(num_slots, slots);
-    uint32_t vec = alloc_bit_map_[v];
-    if (tl_free_vecp != nullptr) {
-      // Clear out the set bits in the thread local free bitmap since these aren't actually
-      // allocated.
-      vec &= ~tl_free_vecp[v];
+  // Free slots are on the free list and the allocated/used slots are not. We traverse the free list
+  // to find out and record which slots are free in the is_free array.
+  std::unique_ptr<bool[]> is_free(new bool[num_slots]());  // zero initialized
+  for (Slot* slot = free_list_.Head(); slot != nullptr; slot = slot->Next()) {
+    size_t slot_idx = SlotIndex(slot);
+    DCHECK_LT(slot_idx, num_slots);
+    is_free[slot_idx] = true;
+  }
+  if (IsThreadLocal()) {
+    for (Slot* slot = thread_local_free_list_.Head(); slot != nullptr; slot = slot->Next()) {
+      size_t slot_idx = SlotIndex(slot);
+      DCHECK_LT(slot_idx, num_slots);
+      is_free[slot_idx] = true;
     }
-    size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
-    for (size_t i = 0; i < end; ++i) {
-      bool is_allocated = ((vec >> i) & 0x1) != 0;
-      uint8_t* slot_addr = slot_base + (slots + i) * bracket_size;
-      if (is_allocated) {
-        handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
-      } else {
-        handler(slot_addr, slot_addr + bracket_size, 0, arg);
-      }
+  }
+  for (size_t slot_idx = 0; slot_idx < num_slots; ++slot_idx) {
+    uint8_t* slot_addr = slot_base + slot_idx * bracket_size;
+    if (!is_free[slot_idx]) {
+      handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
+    } else {
+      handler(slot_addr, slot_addr + bracket_size, 0, arg);
     }
   }
 }
@@ -1236,7 +1104,7 @@
     DCHECK(run != nullptr);
     DCHECK_EQ(run->magic_num_, kMagicNum);
     // Set the bit in the bulk free bit map.
-    freed_bytes += run->MarkBulkFreeBitMap(ptr);
+    freed_bytes += run->AddToBulkFreeList(ptr);
 #ifdef __ANDROID__
     if (!run->to_be_bulk_freed_) {
       run->to_be_bulk_freed_ = true;
@@ -1262,7 +1130,7 @@
       DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets);
       DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
-      run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+      run->MergeBulkFreeListToThreadLocalFreeList();
       if (kTraceRosAlloc) {
         LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
                   << std::hex << reinterpret_cast<intptr_t>(run);
@@ -1272,7 +1140,7 @@
       // it's become all free.
     } else {
       bool run_was_full = run->IsFull();
-      run->MergeBulkFreeBitMapIntoAllocBitMap();
+      run->MergeBulkFreeListToFreeList();
       if (kTraceRosAlloc) {
         LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
                   << reinterpret_cast<intptr_t>(run);
@@ -1316,7 +1184,7 @@
           DCHECK(non_full_runs->find(run) == non_full_runs->end());
         }
         if (!run_was_current) {
-          run->ZeroHeader();
+          run->ZeroHeaderAndSlotHeaders();
           MutexLock lock_mu(self, lock_);
           FreePages(self, run, true);
         }
@@ -1677,9 +1545,9 @@
       size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
       free_bytes += num_free_slots * bracketSizes[idx];
       bool dont_care;
-      thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
+      thread_local_run->MergeThreadLocalFreeListToFreeList(&dont_care);
       thread_local_run->SetIsThreadLocal(false);
-      thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
+      thread_local_run->MergeBulkFreeListToFreeList();
       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
       RevokeRun(self, idx, thread_local_run);
@@ -1702,7 +1570,7 @@
       }
     }
   } else if (run->IsAllFree()) {
-    run->ZeroHeader();
+    run->ZeroHeaderAndSlotHeaders();
     MutexLock mu(self, lock_);
     FreePages(self, run, true);
   } else {
@@ -1814,22 +1682,15 @@
     size_t max_num_of_slots = run_size / bracket_size;
     // Compute the actual number of slots by taking the header and
     // alignment into account.
-    size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
-    DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
+    size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint64_t));
+    DCHECK_EQ(fixed_header_size, 80U);
     size_t header_size = 0;
-    size_t bulk_free_bit_map_offset = 0;
-    size_t thread_local_free_bit_map_offset = 0;
     size_t num_of_slots = 0;
     // Search for the maximum number of slots that allows enough space
-    // for the header (including the bit maps.)
+    // for the header.
     for (int s = max_num_of_slots; s >= 0; s--) {
       size_t tmp_slots_size = bracket_size * s;
-      size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
-      size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
-      size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
-      size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
-      size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
-      size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
+      size_t tmp_unaligned_header_size = fixed_header_size;
       // Align up the unaligned header size. bracket_size may not be a power of two.
       size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
           tmp_unaligned_header_size :
@@ -1841,24 +1702,19 @@
         // space for the header (including the bit maps.)
         num_of_slots = s;
         header_size = tmp_header_size;
-        bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
-        thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
         break;
       }
     }
-    DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
+    DCHECK_GT(num_of_slots, 0U);
+    DCHECK_GT(header_size, 0U);
     // Add the padding for the alignment remainder.
     header_size += run_size % bracket_size;
     DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
     numOfSlots[i] = num_of_slots;
     headerSizes[i] = header_size;
-    bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
-    threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
     if (kTraceRosAlloc) {
       LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
-                << ", headerSizes[" << i << "]=" << headerSizes[i]
-                << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
-                << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
+                << ", headerSizes[" << i << "]=" << headerSizes[i];
     }
   }
   // Fill the alloc bitmap so nobody can successfully allocate from it.
@@ -1868,8 +1724,11 @@
   // It doesn't matter which size bracket we use since the main goal is to have the allocation
   // fail 100% of the time you attempt to allocate into the dedicated full run.
   dedicated_full_run_->size_bracket_idx_ = 0;
-  dedicated_full_run_->FillAllocBitMap();
+  DCHECK_EQ(dedicated_full_run_->FreeList()->Size(), 0U);  // It looks full.
   dedicated_full_run_->SetIsThreadLocal(true);
+
+  // The smallest bracket size must be at least as large as the sizeof(Slot).
+  DCHECK_LE(sizeof(Slot), bracketSizes[0]) << "sizeof(Slot) <= the smallest bracket size";
 }
 
 void RosAlloc::BytesAllocatedCallback(void* start ATTRIBUTE_UNUSED, void* end ATTRIBUTE_UNUSED,
@@ -2025,19 +1884,12 @@
   CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
   uint8_t* slot_base = reinterpret_cast<uint8_t*>(this) + headerSizes[idx];
   const size_t num_slots = numOfSlots[idx];
-  const size_t num_vec = RoundUp(num_slots, 32) / 32;
-  CHECK_GT(num_vec, 0U);
   size_t bracket_size = IndexToBracketSize(idx);
   CHECK_EQ(slot_base + num_slots * bracket_size,
            reinterpret_cast<uint8_t*>(this) + numOfPages[idx] * kPageSize)
       << "Mismatch in the end address of the run " << Dump();
-  // Check that the bulk free bitmap is clean. It's only used during BulkFree().
-  CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
-  uint32_t last_word_mask = GetBitmapLastVectorMask(num_slots, num_vec);
-  // Make sure all the bits at the end of the run are set so that we don't allocate there.
-  CHECK_EQ(alloc_bit_map_[num_vec - 1] & last_word_mask, last_word_mask);
-  // Ensure that the first bitmap index is valid.
-  CHECK_LT(first_search_vec_idx_, num_vec);
+  // Check that the bulk free list is empty. It's only used during BulkFree().
+  CHECK(IsBulkFreeListEmpty()) << "The bulk free isn't empty " << Dump();
   // Check the thread local runs, the current runs, and the run sets.
   if (IsThreadLocal()) {
     // If it's a thread local run, then it must be pointed to by an owner thread.
@@ -2059,11 +1911,11 @@
     }
     CHECK(owner_found) << "A thread local run has no owner thread " << Dump();
   } else {
-    // If it's not thread local, check that the thread local free bitmap is clean.
-    CHECK(IsThreadLocalFreeBitmapClean())
-        << "A non-thread-local run's thread local free bitmap isn't clean "
+    // If it's not thread local, check that the thread local free list is empty.
+    CHECK(IsThreadLocalFreeListEmpty())
+        << "A non-thread-local run's thread local free list isn't empty "
         << Dump();
-    // Check if it's a current run for the size bucket.
+    // Check if it's a current run for the size bracket.
     bool is_current_run = false;
     for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
       MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
@@ -2101,34 +1953,39 @@
     }
   }
   // Check each slot.
-  size_t slots = 0;
   size_t memory_tool_modifier = running_on_memory_tool ?
       2 * ::art::gc::space::kDefaultMemoryToolRedZoneBytes :
       0U;
-  for (size_t v = 0; v < num_vec; v++, slots += 32) {
-    DCHECK_GE(num_slots, slots) << "Out of bounds";
-    uint32_t vec = alloc_bit_map_[v];
-    uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v];
-    size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
-    for (size_t i = 0; i < end; ++i) {
-      bool is_allocated = ((vec >> i) & 0x1) != 0;
-      // If a thread local run, slots may be marked freed in the
-      // thread local free bitmap.
-      bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0;
-      if (is_allocated && !is_thread_local_freed) {
-        uint8_t* slot_addr = slot_base + (slots + i) * bracket_size;
-        if (running_on_memory_tool) {
-          slot_addr += ::art::gc::space::kDefaultMemoryToolRedZoneBytes;
-        }
-        mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
-        size_t obj_size = obj->SizeOf();
-        CHECK_LE(obj_size + memory_tool_modifier, kLargeSizeThreshold)
-            << "A run slot contains a large object " << Dump();
-        CHECK_EQ(SizeToIndex(obj_size + memory_tool_modifier), idx)
-            << PrettyTypeOf(obj) << " "
-            << "obj_size=" << obj_size << "(" << obj_size + memory_tool_modifier << "), idx=" << idx
-            << " A run slot contains an object with wrong size " << Dump();
-      }
+  // TODO: reuse InspectAllSlots().
+  std::unique_ptr<bool[]> is_free(new bool[num_slots]());  // zero initialized
+  // Mark the free slots and the remaining ones are allocated.
+  for (Slot* slot = free_list_.Head(); slot != nullptr; slot = slot->Next()) {
+    size_t slot_idx = SlotIndex(slot);
+    DCHECK_LT(slot_idx, num_slots);
+    is_free[slot_idx] = true;
+  }
+  if (IsThreadLocal()) {
+    for (Slot* slot = thread_local_free_list_.Head(); slot != nullptr; slot = slot->Next()) {
+      size_t slot_idx = SlotIndex(slot);
+      DCHECK_LT(slot_idx, num_slots);
+      is_free[slot_idx] = true;
+    }
+  }
+  for (size_t slot_idx = 0; slot_idx < num_slots; ++slot_idx) {
+    uint8_t* slot_addr = slot_base + slot_idx * bracket_size;
+    if (running_on_memory_tool) {
+      slot_addr += ::art::gc::space::kDefaultMemoryToolRedZoneBytes;
+    }
+    if (!is_free[slot_idx]) {
+      // The slot is allocated
+      mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
+      size_t obj_size = obj->SizeOf();
+      CHECK_LE(obj_size + memory_tool_modifier, kLargeSizeThreshold)
+          << "A run slot contains a large object " << Dump();
+      CHECK_EQ(SizeToIndex(obj_size + memory_tool_modifier), idx)
+          << PrettyTypeOf(obj) << " "
+          << "obj_size=" << obj_size << "(" << obj_size + memory_tool_modifier << "), idx=" << idx
+          << " A run slot contains an object with wrong size " << Dump();
     }
   }
 }
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index a7f29af..87f1392 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -112,6 +112,198 @@
     DISALLOW_COPY_AND_ASSIGN(FreePageRun);
   };
 
+  // The slot header.
+  class Slot {
+   public:
+    Slot* Next() const {
+      return next_;
+    }
+    void SetNext(Slot* next) {
+      next_ = next;
+    }
+    // The slot right before this slot in terms of the address.
+    Slot* Left(size_t bracket_size) {
+      return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size);
+    }
+    void Clear() {
+      next_ = nullptr;
+    }
+
+   private:
+    Slot* next_;  // Next slot in the list.
+  };
+
+  // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to
+  // traverse the list from the head to the tail when merging free lists.
+  // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the
+  // tail in the allocation fast path for a performance reason.
+  template<bool kUseTail = true>
+  class SlotFreeList {
+   public:
+    SlotFreeList() : head_(0U), tail_(0), size_(0) {}
+    Slot* Head() const {
+      return reinterpret_cast<Slot*>(head_);
+    }
+    Slot* Tail() const {
+      CHECK(kUseTail);
+      return reinterpret_cast<Slot*>(tail_);
+    }
+    size_t Size() const {
+      return size_;
+    }
+    // Removes from the head of the free list.
+    Slot* Remove() {
+      Slot* slot;
+      if (kIsDebugBuild) {
+        Verify();
+      }
+      Slot** headp = reinterpret_cast<Slot**>(&head_);
+      Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
+      Slot* old_head = *headp;
+      if (old_head == nullptr) {
+        // List was empty.
+        if (kUseTail) {
+          DCHECK(*tailp == nullptr);
+        }
+        return nullptr;
+      } else {
+        // List wasn't empty.
+        if (kUseTail) {
+          DCHECK(*tailp != nullptr);
+        }
+        Slot* old_head_next = old_head->Next();
+        slot = old_head;
+        *headp = old_head_next;
+        if (kUseTail && old_head_next == nullptr) {
+          // List becomes empty.
+          *tailp = nullptr;
+        }
+      }
+      slot->Clear();
+      --size_;
+      if (kIsDebugBuild) {
+        Verify();
+      }
+      return slot;
+    }
+    void Add(Slot* slot) {
+      if (kIsDebugBuild) {
+        Verify();
+      }
+      DCHECK(slot != nullptr);
+      Slot** headp = reinterpret_cast<Slot**>(&head_);
+      Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
+      Slot* old_head = *headp;
+      if (old_head == nullptr) {
+        // List was empty.
+        if (kUseTail) {
+          DCHECK(*tailp == nullptr);
+        }
+        *headp = slot;
+        if (kUseTail) {
+          *tailp = slot;
+        }
+      } else {
+        // List wasn't empty.
+        if (kUseTail) {
+          DCHECK(*tailp != nullptr);
+        }
+        *headp = slot;
+        slot->SetNext(old_head);
+      }
+      ++size_;
+      if (kIsDebugBuild) {
+        Verify();
+      }
+    }
+    // Merge the given list into this list. Empty the given list.
+    // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't
+    // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2)
+    // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do
+    // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid.
+    void Merge(SlotFreeList<true>* list) {
+      if (kIsDebugBuild) {
+        Verify();
+        CHECK(list != nullptr);
+        list->Verify();
+      }
+      if (list->Size() == 0) {
+        return;
+      }
+      Slot** headp = reinterpret_cast<Slot**>(&head_);
+      Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
+      Slot* old_head = *headp;
+      if (old_head == nullptr) {
+        // List was empty.
+        *headp = list->Head();
+        if (kUseTail) {
+          *tailp = list->Tail();
+        }
+        size_ = list->Size();
+      } else {
+        // List wasn't empty.
+        DCHECK(list->Head() != nullptr);
+        *headp = list->Head();
+        DCHECK(list->Tail() != nullptr);
+        list->Tail()->SetNext(old_head);
+        // if kUseTail, no change to tailp.
+        size_ += list->Size();
+      }
+      list->Reset();
+      if (kIsDebugBuild) {
+        Verify();
+      }
+    }
+
+    void Reset() {
+      head_ = 0;
+      if (kUseTail) {
+        tail_ = 0;
+      }
+      size_ = 0;
+    }
+
+    void Verify() {
+      Slot* head = reinterpret_cast<Slot*>(head_);
+      Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr;
+      if (size_ == 0) {
+        CHECK(head == nullptr);
+        if (kUseTail) {
+          CHECK(tail == nullptr);
+        }
+      } else {
+        CHECK(head != nullptr);
+        if (kUseTail) {
+          CHECK(tail != nullptr);
+        }
+        size_t count = 0;
+        for (Slot* slot = head; slot != nullptr; slot = slot->Next()) {
+          ++count;
+          if (kUseTail && slot->Next() == nullptr) {
+            CHECK_EQ(slot, tail);
+          }
+        }
+        CHECK_EQ(size_, count);
+      }
+    }
+
+   private:
+    // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same
+    // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1)
+    // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the
+    // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise
+    // (won't open up enough space to cause an extra slot to be available).
+    uint64_t head_;
+    // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same
+    // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists.
+    // Unused if kUseTail is false.
+    uint64_t tail_;
+    // The number of slots in the list. This is used to make it fast to check if a free list is all
+    // free without traversing the whole free list.
+    uint32_t size_;
+    uint32_t padding_ ATTRIBUTE_UNUSED;
+  };
+
   // Represents a run of memory slots of the same size.
   //
   // A run's memory layout:
@@ -125,19 +317,17 @@
   // +-------------------+
   // | to_be_bulk_freed  |
   // +-------------------+
-  // | top_bitmap_idx    |
-  // +-------------------+
   // |                   |
-  // | alloc bit map     |
+  // | free list         |
   // |                   |
   // +-------------------+
   // |                   |
-  // | bulk free bit map |
+  // | bulk free list    |
   // |                   |
   // +-------------------+
   // |                   |
   // | thread-local free |
-  // | bit map           |
+  // | list              |
   // |                   |
   // +-------------------+
   // | padding due to    |
@@ -160,94 +350,100 @@
     uint8_t size_bracket_idx_;          // The index of the size bracket of this run.
     uint8_t is_thread_local_;           // True if this run is used as a thread-local run.
     uint8_t to_be_bulk_freed_;          // Used within BulkFree() to flag a run that's involved with a bulk free.
-    uint32_t first_search_vec_idx_;  // The index of the first bitmap vector which may contain an available slot.
-    uint32_t alloc_bit_map_[0];      // The bit map that allocates if each slot is in use.
+    uint32_t padding_ ATTRIBUTE_UNUSED;
+    // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail.
+    SlotFreeList<false> free_list_;
+    SlotFreeList<true> bulk_free_list_;
+    SlotFreeList<true> thread_local_free_list_;
+    // Padding due to alignment
+    // Slot 0
+    // Slot 1
+    // ...
 
-    // bulk_free_bit_map_[] : The bit map that is used for GC to
-    // temporarily mark the slots to free without using a lock. After
-    // all the slots to be freed in a run are marked, all those slots
-    // get freed in bulk with one locking per run, as opposed to one
-    // locking per slot to minimize the lock contention. This is used
-    // within BulkFree().
-
-    // thread_local_free_bit_map_[] : The bit map that is used for GC
-    // to temporarily mark the slots to free in a thread-local run
-    // without using a lock (without synchronizing the thread that
-    // owns the thread-local run.) When the thread-local run becomes
-    // full, the thread will check this bit map and update the
-    // allocation bit map of the run (that is, the slots get freed.)
-
-    // Returns the byte size of the header except for the bit maps.
+    // Returns the byte size of the header.
     static size_t fixed_header_size() {
-      Run temp;
-      size_t size = reinterpret_cast<uint8_t*>(&temp.alloc_bit_map_) - reinterpret_cast<uint8_t*>(&temp);
-      DCHECK_EQ(size, static_cast<size_t>(8));
-      return size;
+      return sizeof(Run);
     }
-    // Returns the base address of the free bit map.
-    uint32_t* BulkFreeBitMap() {
-      return reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
+    Slot* FirstSlot() {
+      const uint8_t idx = size_bracket_idx_;
+      return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
     }
-    // Returns the base address of the thread local free bit map.
-    uint32_t* ThreadLocalFreeBitMap() {
-      return reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
+    Slot* LastSlot() {
+      const uint8_t idx = size_bracket_idx_;
+      const size_t bracket_size = bracketSizes[idx];
+      uintptr_t end = reinterpret_cast<uintptr_t>(End());
+      Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size);
+      DCHECK_LE(FirstSlot(), last_slot);
+      return last_slot;
+    }
+    SlotFreeList<false>* FreeList() {
+      return &free_list_;
+    }
+    SlotFreeList<true>* BulkFreeList() {
+      return &bulk_free_list_;
+    }
+    SlotFreeList<true>* ThreadLocalFreeList() {
+      return &thread_local_free_list_;
     }
     void* End() {
       return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_];
     }
-    // Returns the number of bitmap words per run.
-    size_t NumberOfBitmapVectors() const {
-      return RoundUp(numOfSlots[size_bracket_idx_], 32) / 32;
-    }
     void SetIsThreadLocal(bool is_thread_local) {
       is_thread_local_  = is_thread_local ? 1 : 0;
     }
     bool IsThreadLocal() const {
       return is_thread_local_ != 0;
     }
-    // Frees slots in the allocation bit map with regard to the
-    // thread-local free bit map. Used when a thread-local run becomes
+    // Set up the free list for a new/empty run.
+    void InitFreeList() {
+      const uint8_t idx = size_bracket_idx_;
+      const size_t bracket_size = bracketSizes[idx];
+      Slot* first_slot = FirstSlot();
+      // Add backwards so the first slot is at the head of the list.
+      for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) {
+        free_list_.Add(slot);
+      }
+    }
+    // Merge the thread local free list to the free list.  Used when a thread-local run becomes
     // full.
-    bool MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out);
-    // Frees slots in the allocation bit map with regard to the bulk
-    // free bit map. Used in a bulk free.
-    void MergeBulkFreeBitMapIntoAllocBitMap();
-    // Unions the slots to be freed in the free bit map into the
-    // thread-local free bit map. In a bulk free, as a two-step
-    // process, GC will first record all the slots to free in a run in
-    // the free bit map where it can write without a lock, and later
-    // acquire a lock once per run to union the bits of the free bit
-    // map to the thread-local free bit map.
-    void UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+    bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out);
+    // Merge the bulk free list to the free list. Used in a bulk free.
+    void MergeBulkFreeListToFreeList();
+    // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step
+    // process, GC will first record all the slots to free in a run in the bulk free list where it
+    // can write without a lock, and later acquire a lock once per run to merge the bulk free list
+    // to the thread-local free list.
+    void MergeBulkFreeListToThreadLocalFreeList();
     // Allocates a slot in a run.
-    void* AllocSlot();
+    ALWAYS_INLINE void* AllocSlot();
     // Frees a slot in a run. This is used in a non-bulk free.
     void FreeSlot(void* ptr);
-    // Marks the slots to free in the bulk free bit map. Returns the bracket size.
-    size_t MarkBulkFreeBitMap(void* ptr);
-    // Marks the slots to free in the thread-local free bit map.
-    void MarkThreadLocalFreeBitMap(void* ptr);
-    // Last word mask, all of the bits in the last word which aren't valid slots are set to
-    // optimize allocation path.
-    static uint32_t GetBitmapLastVectorMask(size_t num_slots, size_t num_vec);
+    // Add the given slot to the bulk free list. Returns the bracket size.
+    size_t AddToBulkFreeList(void* ptr);
+    // Add the given slot to the thread-local free list.
+    void AddToThreadLocalFreeList(void* ptr);
     // Returns true if all the slots in the run are not in use.
-    bool IsAllFree();
+    bool IsAllFree() const {
+      return free_list_.Size() == numOfSlots[size_bracket_idx_];
+    }
     // Returns the number of free slots.
-    size_t NumberOfFreeSlots();
+    size_t NumberOfFreeSlots() {
+      return free_list_.Size();
+    }
     // Returns true if all the slots in the run are in use.
     ALWAYS_INLINE bool IsFull();
-    // Returns true if the bulk free bit map is clean.
-    bool IsBulkFreeBitmapClean();
-    // Returns true if the thread local free bit map is clean.
-    bool IsThreadLocalFreeBitmapClean();
-    // Set the alloc_bit_map_ bits for slots that are past the end of the run.
-    void SetAllocBitMapBitsForInvalidSlots();
+    // Returns true if the bulk free list is empty.
+    bool IsBulkFreeListEmpty() const {
+      return bulk_free_list_.Size() == 0;
+    }
+    // Returns true if the thread local free list is empty.
+    bool IsThreadLocalFreeListEmpty() const {
+      return thread_local_free_list_.Size() == 0;
+    }
     // Zero the run's data.
     void ZeroData();
-    // Zero the run's header.
-    void ZeroHeader();
-    // Fill the alloc bitmap with 1s.
-    void FillAllocBitMap();
+    // Zero the run's header and the slot headers.
+    void ZeroHeaderAndSlotHeaders();
     // Iterate over all the slots and apply the given function.
     void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
     // Dump the run metadata for debugging.
@@ -258,11 +454,24 @@
         REQUIRES(Locks::thread_list_lock_);
 
    private:
-    // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap(). Returns the bracket
+    // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket
     // size.
-    size_t MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
-    // Turns the bit map into a string for debugging.
-    static std::string BitMapToStr(uint32_t* bit_map_base, size_t num_vec);
+    size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name);
+    // Turns a FreeList into a string for debugging.
+    template<bool kUseTail>
+    std::string FreeListToStr(SlotFreeList<kUseTail>* free_list);
+    // Check a given pointer is a valid slot address and return it as Slot*.
+    Slot* ToSlot(void* ptr) {
+      const uint8_t idx = size_bracket_idx_;
+      const size_t bracket_size = bracketSizes[idx];
+      const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
+          - reinterpret_cast<uint8_t*>(FirstSlot());
+      DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
+      size_t slot_idx = offset_from_slot_base / bracket_size;
+      DCHECK_LT(slot_idx, numOfSlots[idx]);
+      return reinterpret_cast<Slot*>(ptr);
+    }
+    size_t SlotIndex(Slot* slot);
 
     // TODO: DISALLOW_COPY_AND_ASSIGN(Run);
   };
@@ -283,10 +492,6 @@
   static size_t numOfSlots[kNumOfSizeBrackets];
   // The header sizes in bytes of the runs for each size bracket.
   static size_t headerSizes[kNumOfSizeBrackets];
-  // The byte offsets of the bulk free bit maps of the runs for each size bracket.
-  static size_t bulkFreeBitMapOffsets[kNumOfSizeBrackets];
-  // The byte offsets of the thread-local free bit maps of the runs for each size bracket.
-  static size_t threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
 
   // Initialize the run specs (the above arrays).
   static void Initialize();
@@ -493,7 +698,7 @@
   // The reader-writer lock to allow one bulk free at a time while
   // allowing multiple individual frees at the same time. Also, this
   // is used to avoid race conditions between BulkFree() and
-  // RevokeThreadLocalRuns() on the bulk free bitmaps.
+  // RevokeThreadLocalRuns() on the bulk free list.
   ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   // The page release mode.
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 399591b..0a7a69f 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -457,6 +457,8 @@
     CheckEmptyMarkStack();
     // Re-enable weak ref accesses.
     ReenableWeakRefAccess(self);
+    // Free data for class loaders that we unloaded.
+    Runtime::Current()->GetClassLinker()->CleanupClassLoaders();
     // Marking is done. Disable marking.
     DisableMarking();
     CheckEmptyMarkStack();
@@ -618,7 +620,10 @@
     gc_mark_stack_->PushBack(to_ref);
   } else {
     CHECK_EQ(static_cast<uint32_t>(mark_stack_mode),
-             static_cast<uint32_t>(kMarkStackModeGcExclusive));
+             static_cast<uint32_t>(kMarkStackModeGcExclusive))
+        << "ref=" << to_ref
+        << " self->gc_marking=" << self->GetIsGcMarking()
+        << " cc->is_marking=" << is_marking_;
     CHECK(self == thread_running_gc_)
         << "Only GC-running thread should access the mark stack "
         << "in the GC exclusive mark stack mode";
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 60f833b..f561764 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -205,6 +205,7 @@
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     SweepSystemWeaks();
   }
+  Runtime::Current()->GetClassLinker()->CleanupClassLoaders();
   // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
   // before they are properly counted.
   RevokeAllThreadLocalBuffers();
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 56edcc9..e72277f 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -29,7 +29,8 @@
 namespace collector {
 
 template<typename MarkVisitor, typename ReferenceVisitor>
-inline void MarkSweep::ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor,
+inline void MarkSweep::ScanObjectVisit(mirror::Object* obj,
+                                       const MarkVisitor& visitor,
                                        const ReferenceVisitor& ref_visitor) {
   DCHECK(IsMarked(obj)) << "Scanning unmarked object " << obj << "\n" << heap_->DumpSpaces();
   obj->VisitReferences(visitor, ref_visitor);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 089f453..77a288b 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -95,10 +95,13 @@
     : GarbageCollector(heap,
                        name_prefix +
                        (is_concurrent ? "concurrent mark sweep": "mark sweep")),
-      current_space_bitmap_(nullptr), mark_bitmap_(nullptr), mark_stack_(nullptr),
+      current_space_bitmap_(nullptr),
+      mark_bitmap_(nullptr),
+      mark_stack_(nullptr),
       gc_barrier_(new Barrier(0)),
       mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock),
-      is_concurrent_(is_concurrent), live_stack_freeze_size_(0) {
+      is_concurrent_(is_concurrent),
+      live_stack_freeze_size_(0) {
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous(
       "mark sweep sweep array free buffer", nullptr,
@@ -173,7 +176,10 @@
 void MarkSweep::ProcessReferences(Thread* self) {
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
-      true, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), this);
+      true,
+      GetTimings(),
+      GetCurrentIteration()->GetClearSoftReferences(),
+      this);
 }
 
 void MarkSweep::PausePhase() {
@@ -265,8 +271,9 @@
 void MarkSweep::UpdateAndMarkModUnion() {
   for (const auto& space : heap_->GetContinuousSpaces()) {
     if (immune_region_.ContainsSpace(space)) {
-      const char* name = space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
-          "UpdateAndMarkImageModUnionTable";
+      const char* name = space->IsZygoteSpace()
+          ? "UpdateAndMarkZygoteModUnionTable"
+          : "UpdateAndMarkImageModUnionTable";
       TimingLogger::ScopedTiming t(name, GetTimings());
       accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
       CHECK(mod_union_table != nullptr);
@@ -283,11 +290,15 @@
 
 void MarkSweep::ReclaimPhase() {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
-  Thread* self = Thread::Current();
+  Thread* const self = Thread::Current();
   // Process the references concurrently.
   ProcessReferences(self);
   SweepSystemWeaks(self);
-  Runtime::Current()->AllowNewSystemWeaks();
+  Runtime* const runtime = Runtime::Current();
+  runtime->AllowNewSystemWeaks();
+  // Clean up class loaders after system weaks are swept since that is how we know if class
+  // unloading occurred.
+  runtime->GetClassLinker()->CleanupClassLoaders();
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     GetHeap()->RecordFreeRevoke();
@@ -361,10 +372,10 @@
 
 class MarkSweepMarkObjectSlowPath {
  public:
-  explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep, mirror::Object* holder = nullptr,
+  explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep,
+                                       mirror::Object* holder = nullptr,
                                        MemberOffset offset = MemberOffset(0))
-      : mark_sweep_(mark_sweep), holder_(holder), offset_(offset) {
-  }
+      : mark_sweep_(mark_sweep), holder_(holder), offset_(offset) {}
 
   void operator()(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
     if (kProfileLargeObjects) {
@@ -441,7 +452,8 @@
   MemberOffset offset_;
 };
 
-inline void MarkSweep::MarkObjectNonNull(mirror::Object* obj, mirror::Object* holder,
+inline void MarkSweep::MarkObjectNonNull(mirror::Object* obj,
+                                         mirror::Object* holder,
                                          MemberOffset offset) {
   DCHECK(obj != nullptr);
   if (kUseBakerOrBrooksReadBarrier) {
@@ -508,7 +520,8 @@
 }
 
 // Used to mark objects when processing the mark stack. If an object is null, it is not marked.
-inline void MarkSweep::MarkObject(mirror::Object* obj, mirror::Object* holder,
+inline void MarkSweep::MarkObject(mirror::Object* obj,
+                                  mirror::Object* holder,
                                   MemberOffset offset) {
   if (obj != nullptr) {
     MarkObjectNonNull(obj, holder, offset);
@@ -530,14 +543,16 @@
   MarkSweep* const collector_;
 };
 
-void MarkSweep::VisitRoots(mirror::Object*** roots, size_t count,
+void MarkSweep::VisitRoots(mirror::Object*** roots,
+                           size_t count,
                            const RootInfo& info ATTRIBUTE_UNUSED) {
   for (size_t i = 0; i < count; ++i) {
     MarkObjectNonNull(*roots[i]);
   }
 }
 
-void MarkSweep::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+void MarkSweep::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
+                           size_t count,
                            const RootInfo& info ATTRIBUTE_UNUSED) {
   for (size_t i = 0; i < count; ++i) {
     MarkObjectNonNull(roots[i]->AsMirrorPtr());
@@ -596,8 +611,10 @@
   explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
       : mark_sweep_(mark_sweep) {}
 
-  void operator()(mirror::Object* obj) const ALWAYS_INLINE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
+  void operator()(mirror::Object* obj) const
+      ALWAYS_INLINE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     if (kCheckLocks) {
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
@@ -611,12 +628,11 @@
 
 class DelayReferenceReferentVisitor {
  public:
-  explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {
-  }
+  explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {}
 
   void operator()(mirror::Class* klass, mirror::Reference* ref) const
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(Locks::heap_bitmap_lock_) {
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     collector_->DelayReferenceReferent(klass, ref);
   }
 
@@ -627,7 +643,9 @@
 template <bool kUseFinger = false>
 class MarkStackTask : public Task {
  public:
-  MarkStackTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, size_t mark_stack_size,
+  MarkStackTask(ThreadPool* thread_pool,
+                MarkSweep* mark_sweep,
+                size_t mark_stack_size,
                 StackReference<mirror::Object>* mark_stack)
       : mark_sweep_(mark_sweep),
         thread_pool_(thread_pool),
@@ -652,8 +670,10 @@
                                             MarkSweep* mark_sweep)
         : chunk_task_(chunk_task), mark_sweep_(mark_sweep) {}
 
-    void operator()(mirror::Object* obj, MemberOffset offset, bool /* static */) const
-        ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
+    ALWAYS_INLINE void operator()(mirror::Object* obj,
+                    MemberOffset offset,
+                    bool is_static ATTRIBUTE_UNUSED) const
+        SHARED_REQUIRES(Locks::mutator_lock_) {
       Mark(obj->GetFieldObject<mirror::Object>(offset));
     }
 
@@ -674,7 +694,7 @@
     }
 
    private:
-    void Mark(mirror::Object* ref) const ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
+    ALWAYS_INLINE void Mark(mirror::Object* ref) const SHARED_REQUIRES(Locks::mutator_lock_) {
       if (ref != nullptr && mark_sweep_->MarkObjectParallel(ref)) {
         if (kUseFinger) {
           std::atomic_thread_fence(std::memory_order_seq_cst);
@@ -693,12 +713,13 @@
 
   class ScanObjectParallelVisitor {
    public:
-    explicit ScanObjectParallelVisitor(MarkStackTask<kUseFinger>* chunk_task) ALWAYS_INLINE
+    ALWAYS_INLINE explicit ScanObjectParallelVisitor(MarkStackTask<kUseFinger>* chunk_task)
         : chunk_task_(chunk_task) {}
 
     // No thread safety analysis since multiple threads will use this visitor.
-    void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
-        REQUIRES(Locks::heap_bitmap_lock_) {
+    void operator()(mirror::Object* obj) const
+        REQUIRES(Locks::heap_bitmap_lock_)
+        SHARED_REQUIRES(Locks::mutator_lock_) {
       MarkSweep* const mark_sweep = chunk_task_->mark_sweep_;
       MarkObjectParallelVisitor mark_visitor(chunk_task_, mark_sweep);
       DelayReferenceReferentVisitor ref_visitor(mark_sweep);
@@ -729,7 +750,9 @@
     if (UNLIKELY(mark_stack_pos_ == kMaxSize)) {
       // Mark stack overflow, give 1/2 the stack to the thread pool as a new work task.
       mark_stack_pos_ /= 2;
-      auto* task = new MarkStackTask(thread_pool_, mark_sweep_, kMaxSize - mark_stack_pos_,
+      auto* task = new MarkStackTask(thread_pool_,
+                                     mark_sweep_,
+                                     kMaxSize - mark_stack_pos_,
                                      mark_stack_ + mark_stack_pos_);
       thread_pool_->AddTask(Thread::Current(), task);
     }
@@ -743,9 +766,9 @@
   }
 
   // Scans all of the objects
-  virtual void Run(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(Locks::heap_bitmap_lock_) {
-    UNUSED(self);
+  virtual void Run(Thread* self ATTRIBUTE_UNUSED)
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     ScanObjectParallelVisitor visitor(this);
     // TODO: Tune this.
     static const size_t kFifoSize = 4;
@@ -778,16 +801,21 @@
 
 class CardScanTask : public MarkStackTask<false> {
  public:
-  CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
+  CardScanTask(ThreadPool* thread_pool,
+               MarkSweep* mark_sweep,
                accounting::ContinuousSpaceBitmap* bitmap,
-               uint8_t* begin, uint8_t* end, uint8_t minimum_age, size_t mark_stack_size,
-               StackReference<mirror::Object>* mark_stack_obj, bool clear_card)
+               uint8_t* begin,
+               uint8_t* end,
+               uint8_t minimum_age,
+               size_t mark_stack_size,
+               StackReference<mirror::Object>* mark_stack_obj,
+               bool clear_card)
       : MarkStackTask<false>(thread_pool, mark_sweep, mark_stack_size, mark_stack_obj),
         bitmap_(bitmap),
         begin_(begin),
         end_(end),
-        minimum_age_(minimum_age), clear_card_(clear_card) {
-  }
+        minimum_age_(minimum_age),
+        clear_card_(clear_card) {}
 
  protected:
   accounting::ContinuousSpaceBitmap* const bitmap_;
@@ -803,9 +831,9 @@
   virtual void Run(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
     ScanObjectParallelVisitor visitor(this);
     accounting::CardTable* card_table = mark_sweep_->GetHeap()->GetCardTable();
-    size_t cards_scanned = clear_card_ ?
-                           card_table->Scan<true>(bitmap_, begin_, end_, visitor, minimum_age_) :
-                           card_table->Scan<false>(bitmap_, begin_, end_, visitor, minimum_age_);
+    size_t cards_scanned = clear_card_
+        ? card_table->Scan<true>(bitmap_, begin_, end_, visitor, minimum_age_)
+        : card_table->Scan<false>(bitmap_, begin_, end_, visitor, minimum_age_);
     VLOG(heap) << "Parallel scanning cards " << reinterpret_cast<void*>(begin_) << " - "
         << reinterpret_cast<void*>(end_) << " = " << cards_scanned;
     // Finish by emptying our local mark stack.
@@ -873,9 +901,15 @@
         mark_stack_->PopBackCount(static_cast<int32_t>(mark_stack_increment));
         DCHECK_EQ(mark_stack_end, mark_stack_->End());
         // Add the new task to the thread pool.
-        auto* task = new CardScanTask(thread_pool, this, space->GetMarkBitmap(), card_begin,
-                                      card_begin + card_increment, minimum_age,
-                                      mark_stack_increment, mark_stack_end, clear_card);
+        auto* task = new CardScanTask(thread_pool,
+                                      this,
+                                      space->GetMarkBitmap(),
+                                      card_begin,
+                                      card_begin + card_increment,
+                                      minimum_age,
+                                      mark_stack_increment,
+                                      mark_stack_end,
+                                      clear_card);
         thread_pool->AddTask(self, task);
         card_begin += card_increment;
       }
@@ -911,10 +945,16 @@
         ScanObjectVisitor visitor(this);
         bool clear_card = paused && !space->IsZygoteSpace() && !space->IsImageSpace();
         if (clear_card) {
-          card_table->Scan<true>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor,
+          card_table->Scan<true>(space->GetMarkBitmap(),
+                                 space->Begin(),
+                                 space->End(),
+                                 visitor,
                                  minimum_age);
         } else {
-          card_table->Scan<false>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor,
+          card_table->Scan<false>(space->GetMarkBitmap(),
+                                  space->Begin(),
+                                  space->End(),
+                                  visitor,
                                   minimum_age);
         }
       }
@@ -924,11 +964,15 @@
 
 class RecursiveMarkTask : public MarkStackTask<false> {
  public:
-  RecursiveMarkTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
-                    accounting::ContinuousSpaceBitmap* bitmap, uintptr_t begin, uintptr_t end)
-      : MarkStackTask<false>(thread_pool, mark_sweep, 0, nullptr), bitmap_(bitmap), begin_(begin),
-        end_(end) {
-  }
+  RecursiveMarkTask(ThreadPool* thread_pool,
+                    MarkSweep* mark_sweep,
+                    accounting::ContinuousSpaceBitmap* bitmap,
+                    uintptr_t begin,
+                    uintptr_t end)
+      : MarkStackTask<false>(thread_pool, mark_sweep, 0, nullptr),
+        bitmap_(bitmap),
+        begin_(begin),
+        end_(end) {}
 
  protected:
   accounting::ContinuousSpaceBitmap* const bitmap_;
@@ -985,7 +1029,10 @@
             delta = RoundUp(delta, KB);
             if (delta < 16 * KB) delta = end - begin;
             begin += delta;
-            auto* task = new RecursiveMarkTask(thread_pool, this, current_space_bitmap_, start,
+            auto* task = new RecursiveMarkTask(thread_pool,
+                                               this,
+                                               current_space_bitmap_,
+                                               start,
                                                begin);
             thread_pool->AddTask(self, task);
           }
@@ -1032,7 +1079,8 @@
  public:
   explicit VerifySystemWeakVisitor(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {}
 
-  virtual mirror::Object* IsMarked(mirror::Object* obj) OVERRIDE
+  virtual mirror::Object* IsMarked(mirror::Object* obj)
+      OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     mark_sweep_->VerifyIsLive(obj);
     return obj;
@@ -1073,7 +1121,8 @@
     }
   }
 
-  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
+                  size_t count,
                   const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(Locks::heap_bitmap_lock_) {
@@ -1247,7 +1296,8 @@
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
       TimingLogger::ScopedTiming split(
-          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", GetTimings());
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace",
+          GetTimings());
       RecordFree(alloc_space->Sweep(swap_bitmaps));
     }
   }
@@ -1270,12 +1320,13 @@
 
 class MarkVisitor {
  public:
-  explicit MarkVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE : mark_sweep_(mark_sweep) {
-  }
+  ALWAYS_INLINE explicit MarkVisitor(MarkSweep* const mark_sweep) : mark_sweep_(mark_sweep) {}
 
-  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
-      ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(Locks::heap_bitmap_lock_) {
+  ALWAYS_INLINE void operator()(mirror::Object* obj,
+                                MemberOffset offset,
+                                bool is_static ATTRIBUTE_UNUSED) const
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     if (kCheckLocks) {
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
@@ -1284,14 +1335,16 @@
   }
 
   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     if (!root->IsNull()) {
       VisitRoot(root);
     }
   }
 
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     if (kCheckLocks) {
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 371bba5..8f7df78 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -33,9 +33,9 @@
 namespace art {
 
 namespace mirror {
-  class Class;
-  class Object;
-  class Reference;
+class Class;
+class Object;
+class Reference;
 }  // namespace mirror
 
 class Thread;
@@ -46,8 +46,8 @@
 class Heap;
 
 namespace accounting {
-  template<typename T> class AtomicStack;
-  typedef AtomicStack<mirror::Object> ObjectStack;
+template<typename T> class AtomicStack;
+typedef AtomicStack<mirror::Object> ObjectStack;
 }  // namespace accounting
 
 namespace collector {
@@ -60,12 +60,14 @@
 
   virtual void RunPhases() OVERRIDE REQUIRES(!mark_stack_lock_);
   void InitializePhase();
-  void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
-  void PausePhase() REQUIRES(Locks::mutator_lock_, !mark_stack_lock_);
-  void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+  void MarkingPhase() REQUIRES(!mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+  void PausePhase() REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+  void ReclaimPhase() REQUIRES(!mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
   void FinishPhase();
   virtual void MarkReachableObjects()
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool IsConcurrent() const {
     return is_concurrent_;
@@ -87,20 +89,30 @@
 
   // Marks all objects in the root set at the start of a garbage collection.
   void MarkRoots(Thread* self)
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void MarkNonThreadRoots()
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void MarkConcurrentRoots(VisitRootFlags flags)
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void MarkRootsCheckpoint(Thread* self, bool revoke_ros_alloc_thread_local_buffers_at_checkpoint)
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Builds a mark stack and recursively mark until it empties.
   void RecursiveMark()
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
   // the image. Mark that portion of the heap as immune.
@@ -108,26 +120,35 @@
 
   // Builds a mark stack with objects on dirty cards and recursively mark until it empties.
   void RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age)
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Remarks the root set after completing the concurrent mark.
   void ReMarkRoots()
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ProcessReferences(Thread* self)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Update and mark references from immune spaces.
   void UpdateAndMarkModUnion()
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Pre clean cards to reduce how much work is needed in the pause.
   void PreCleanCards()
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection. Virtual as by default it sweeps
   // all allocation spaces. Partial and sticky GCs want to just sweep a subset of the heap.
-  virtual void Sweep(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_)
+  virtual void Sweep(bool swap_bitmaps)
+      REQUIRES(Locks::heap_bitmap_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
@@ -135,20 +156,27 @@
 
   // Sweep only pointers within an array. WARNING: Trashes objects.
   void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Blackens an object.
   void ScanObject(mirror::Object* obj)
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // No thread safety analysis due to lambdas.
   template<typename MarkVisitor, typename ReferenceVisitor>
-  void ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor,
+  void ScanObjectVisit(mirror::Object* obj,
+                       const MarkVisitor& visitor,
                        const ReferenceVisitor& ref_visitor)
-    SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SweepSystemWeaks(Thread* self)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_);
+      REQUIRES(!Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   static mirror::Object* VerifySystemWeakIsLiveCallback(mirror::Object* obj, void* arg)
       SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
@@ -161,22 +189,36 @@
       SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* ref) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+  virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
+                          size_t count,
                           const RootInfo& info) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Marks an object.
   virtual mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void MarkObject(mirror::Object* obj, mirror::Object* holder, MemberOffset offset)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* ref) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   Barrier& GetBarrier() {
     return *gc_barrier_;
@@ -191,13 +233,17 @@
   virtual mirror::Object* IsMarked(mirror::Object* object) OVERRIDE
       SHARED_REQUIRES(Locks::heap_bitmap_lock_);
 
-  void MarkObjectNonNull(mirror::Object* obj, mirror::Object* holder = nullptr,
+  void MarkObjectNonNull(mirror::Object* obj,
+                         mirror::Object* holder = nullptr,
                          MemberOffset offset = MemberOffset(0))
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Marks an object atomically, safe to use from multiple threads.
   void MarkObjectNonNullParallel(mirror::Object* obj)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns true if we need to add obj to a mark stack.
   bool MarkObjectParallel(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
@@ -208,9 +254,12 @@
       NO_THREAD_SAFETY_ANALYSIS;
 
   // Expand mark stack to 2x its current size.
-  void ExpandMarkStack() REQUIRES(mark_stack_lock_)
+  void ExpandMarkStack()
+      REQUIRES(mark_stack_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void ResizeMarkStack(size_t new_size) REQUIRES(mark_stack_lock_)
+
+  void ResizeMarkStack(size_t new_size)
+      REQUIRES(mark_stack_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns how many threads we should use for the current GC phase based on if we are paused,
@@ -218,24 +267,34 @@
   size_t GetThreadCount(bool paused) const;
 
   // Push a single reference on a mark stack.
-  void PushOnMarkStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_);
+  void PushOnMarkStack(mirror::Object* obj)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Blackens objects grayed during a garbage collection.
   void ScanGrayObjects(bool paused, uint8_t minimum_age)
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  virtual void ProcessMarkStack() OVERRIDE REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_)
+  virtual void ProcessMarkStack()
+      OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ProcessMarkStack(false);
   }
 
   // Recursively blackens objects on the mark stack.
   void ProcessMarkStack(bool paused)
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ProcessMarkStackParallel(size_t thread_count)
-      REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Used to Get around thread safety annotations. The call is from MarkingPhase and is guarded by
   // IsExclusiveHeld.
@@ -293,23 +352,15 @@
   std::unique_ptr<MemMap> sweep_array_free_buffer_mem_map_;
 
  private:
-  friend class AddIfReachesAllocSpaceVisitor;  // Used by mod-union table.
   friend class CardScanTask;
   friend class CheckBitmapVisitor;
   friend class CheckReferenceVisitor;
   friend class CheckpointMarkThreadRoots;
-  friend class art::gc::Heap;
+  friend class Heap;
   friend class FifoMarkStackChunk;
   friend class MarkObjectVisitor;
   template<bool kUseFinger> friend class MarkStackTask;
   friend class MarkSweepMarkObjectSlowPath;
-  friend class ModUnionCheckReferences;
-  friend class ModUnionClearCardVisitor;
-  friend class ModUnionReferenceVisitor;
-  friend class ModUnionScanImageRootVisitor;
-  friend class ModUnionTableBitmap;
-  friend class ModUnionTableReferenceCache;
-  friend class ModUnionVisitor;
   friend class VerifyRootMarkedVisitor;
   friend class VerifyRootVisitor;
 
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index ed63ed0..7f57f30 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -248,6 +248,7 @@
     ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     SweepSystemWeaks();
   }
+  Runtime::Current()->GetClassLinker()->CleanupClassLoaders();
   // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
   // before they are properly counted.
   RevokeAllThreadLocalBuffers();
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index 5be3db7..6c32658 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -25,8 +25,7 @@
 namespace collector {
 
 StickyMarkSweep::StickyMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
-    : PartialMarkSweep(heap, is_concurrent,
-                       name_prefix.empty() ? "sticky " : name_prefix) {
+    : PartialMarkSweep(heap, is_concurrent, name_prefix.empty() ? "sticky " : name_prefix) {
   cumulative_timings_.SetName(GetName());
 }
 
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index e8f0672..abaf978 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -38,13 +38,15 @@
   // alloc space will be marked as immune.
   void BindBitmaps() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void MarkReachableObjects() OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(Locks::heap_bitmap_lock_);
+  void MarkReachableObjects()
+      OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void Sweep(bool swap_bitmaps) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(Locks::heap_bitmap_lock_);
+  void Sweep(bool swap_bitmaps)
+      OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(StickyMarkSweep);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index cfe7713..7d664fa 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1963,6 +1963,10 @@
   GrowForUtilization(semi_space_collector_);
   LogGC(kGcCauseHomogeneousSpaceCompact, collector);
   FinishGC(self, collector::kGcTypeFull);
+  {
+    ScopedObjectAccess soa(self);
+    soa.Vm()->UnloadNativeLibraries();
+  }
   return HomogeneousSpaceCompactResult::kSuccess;
 }
 
@@ -2104,6 +2108,10 @@
   DCHECK(collector != nullptr);
   LogGC(kGcCauseCollectorTransition, collector);
   FinishGC(self, collector::kGcTypeFull);
+  {
+    ScopedObjectAccess soa(self);
+    soa.Vm()->UnloadNativeLibraries();
+  }
   int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
   int32_t delta_allocated = before_allocated - after_allocated;
   std::string saved_str;
@@ -2588,6 +2596,12 @@
   FinishGC(self, gc_type);
   // Inform DDMS that a GC completed.
   Dbg::GcDidFinish();
+  // Unload native libraries for class unloading. We do this after calling FinishGC to prevent
+  // deadlocks in case the JNI_OnUnload function does allocations.
+  {
+    ScopedObjectAccess soa(self);
+    soa.Vm()->UnloadNativeLibraries();
+  }
   return gc_type;
 }
 
diff --git a/runtime/globals.h b/runtime/globals.h
index d70f3ab..987a94e 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -58,12 +58,6 @@
 static constexpr bool kIsTargetBuild = false;
 #endif
 
-#if defined(ART_USE_OPTIMIZING_COMPILER)
-static constexpr bool kUseOptimizingCompiler = true;
-#else
-static constexpr bool kUseOptimizingCompiler = false;
-#endif
-
 // Garbage collector constants.
 static constexpr bool kMovingCollector = true;
 static constexpr bool kMarkCompactSupport = false && kMovingCollector;
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 3ac80c6..f783b04 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -21,6 +21,7 @@
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
+#include "stack.h"
 #include "unstarted_runtime.h"
 
 namespace art {
@@ -330,8 +331,9 @@
   }
   // Set up shadow frame with matching number of reference slots to vregs.
   ShadowFrame* last_shadow_frame = self->GetManagedStack()->GetTopShadowFrame();
-  void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
-  ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, last_shadow_frame, method, 0, memory));
+  ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+      CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, 0);
+  ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
   self->PushShadowFrame(shadow_frame);
 
   size_t cur_reg = num_regs - num_ins;
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 68d56f5..ad34c9a 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -21,12 +21,16 @@
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "mirror/array-inl.h"
+#include "stack.h"
 #include "unstarted_runtime.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
 namespace interpreter {
 
+// All lambda closures have to be a consecutive pair of virtual registers.
+static constexpr size_t kLambdaVirtualRegisterWidth = 2;
+
 void ThrowNullPointerExceptionFromInterpreter() {
   ThrowNullPointerExceptionFromDexPC();
 }
@@ -483,13 +487,16 @@
 }
 
 // Separate declaration is required solely for the attributes.
-template<bool is_range, bool do_assignability_check> SHARED_REQUIRES(Locks::mutator_lock_)
+template <bool is_range,
+          bool do_assignability_check,
+          size_t kVarArgMax>
+    SHARED_REQUIRES(Locks::mutator_lock_)
 static inline bool DoCallCommon(ArtMethod* called_method,
                                 Thread* self,
                                 ShadowFrame& shadow_frame,
                                 JValue* result,
                                 uint16_t number_of_inputs,
-                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t (&arg)[kVarArgMax],
                                 uint32_t vregC) ALWAYS_INLINE;
 
 SHARED_REQUIRES(Locks::mutator_lock_)
@@ -509,13 +516,15 @@
         Dbg::IsForcedInterpreterNeededForCalling(self, target);
 }
 
-template<bool is_range, bool do_assignability_check>
+template <bool is_range,
+          bool do_assignability_check,
+          size_t kVarArgMax>
 static inline bool DoCallCommon(ArtMethod* called_method,
                                 Thread* self,
                                 ShadowFrame& shadow_frame,
                                 JValue* result,
                                 uint16_t number_of_inputs,
-                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t (&arg)[kVarArgMax],
                                 uint32_t vregC) {
   bool string_init = false;
   // Replace calls to String.<init> with equivalent StringFactory call.
@@ -560,10 +569,10 @@
     number_of_inputs--;
 
     // Rewrite the var-args, dropping the 0th argument ("this")
-    for (uint32_t i = 1; i < Instruction::kMaxVarArgRegs; ++i) {
+    for (uint32_t i = 1; i < arraysize(arg); ++i) {
       arg[i - 1] = arg[i];
     }
-    arg[Instruction::kMaxVarArgRegs - 1] = 0;
+    arg[arraysize(arg) - 1] = 0;
 
     // Rewrite the non-var-arg case
     vregC++;  // Skips the 0th vreg in the range ("this").
@@ -576,9 +585,9 @@
 
   // Allocate shadow frame on the stack.
   const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
-  void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
-  ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, called_method, 0,
-                                                    memory));
+  ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, 0);
+  ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get();
 
   // Initialize new shadow frame by copying the registers from the callee shadow frame.
   if (do_assignability_check) {
@@ -669,7 +678,7 @@
         AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
       }
     } else {
-      DCHECK_LE(number_of_inputs, Instruction::kMaxVarArgRegs);
+      DCHECK_LE(number_of_inputs, arraysize(arg));
 
       for (; arg_index < number_of_inputs; ++arg_index) {
         AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, arg[arg_index]);
@@ -709,7 +718,11 @@
       SafeMap<uint32_t, std::set<uint32_t>> string_init_map =
           verifier::MethodVerifier::FindStringInitMap(method);
       MutexLock mu(self, *Locks::interpreter_string_init_map_lock_);
-      auto it = method_to_string_init_map.Overwrite(method_ref, string_init_map);
+      auto it = method_to_string_init_map.lower_bound(method_ref);
+      if (it == method_to_string_init_map.end() ||
+          method_to_string_init_map.key_comp()(method_ref, it->first)) {
+        it = method_to_string_init_map.PutBefore(it, method_ref, std::move(string_init_map));
+      }
       string_init_map_ptr = &it->second;
     }
     if (string_init_map_ptr->size() != 0) {
@@ -732,12 +745,13 @@
                   const Instruction* inst, uint16_t inst_data, JValue* result) {
   const uint4_t num_additional_registers = inst->VRegB_25x();
   // Argument word count.
-  const uint16_t number_of_inputs = num_additional_registers + 1;
-  // The first input register is always present and is not encoded in the count.
+  const uint16_t number_of_inputs = num_additional_registers + kLambdaVirtualRegisterWidth;
+  // The lambda closure register is always present and is not encoded in the count.
+  // Furthermore, the lambda closure register is always wide, so it counts as 2 inputs.
 
   // TODO: find a cleaner way to separate non-range and range information without duplicating
   //       code.
-  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t arg[Instruction::kMaxVarArgRegs25x];  // only used in invoke-XXX.
   uint32_t vregC = 0;   // only used in invoke-XXX-range.
   if (is_range) {
     vregC = inst->VRegC_3rc();
@@ -763,7 +777,7 @@
 
   // TODO: find a cleaner way to separate non-range and range information without duplicating
   //       code.
-  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};  // only used in invoke-XXX.
   uint32_t vregC = 0;
   if (is_range) {
     vregC = inst->VRegC_3rc();
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 7398778..f57bddb 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -34,7 +34,12 @@
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
+#include "lambda/art_lambda_method.h"
 #include "lambda/box_table.h"
+#include "lambda/closure.h"
+#include "lambda/closure_builder-inl.h"
+#include "lambda/leaking_allocator.h"
+#include "lambda/shorty_field_type.h"
 #include "mirror/class-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
@@ -133,32 +138,44 @@
   return success;
 }
 
-// Write out the 'ArtMethod*' into vreg and vreg+1
+// Write out the 'Closure*' into vreg and vreg+1, as if it was a jlong.
 static inline void WriteLambdaClosureIntoVRegs(ShadowFrame& shadow_frame,
-                                               const ArtMethod& called_method,
+                                               const lambda::Closure* lambda_closure,
                                                uint32_t vreg) {
   // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
-  uint32_t called_method_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&called_method));
-  uint32_t called_method_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(&called_method)
+  uint32_t closure_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(lambda_closure));
+  uint32_t closure_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(lambda_closure)
                                                     >> BitSizeOf<uint32_t>());
   // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
   static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
 
-  DCHECK_NE(called_method_lo | called_method_hi, 0u);
+  DCHECK_NE(closure_lo | closure_hi, 0u);
 
-  shadow_frame.SetVReg(vreg, called_method_lo);
-  shadow_frame.SetVReg(vreg + 1, called_method_hi);
+  shadow_frame.SetVReg(vreg, closure_lo);
+  shadow_frame.SetVReg(vreg + 1, closure_hi);
 }
 
 // Handles create-lambda instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 // (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
 //
+// The closure must be allocated big enough to hold the data, and should not be
+// pre-initialized. It is initialized with the actual captured variables as a side-effect,
+// although this should be unimportant to the caller since this function also handles storing it to
+// the ShadowFrame.
+//
 // As a work-in-progress implementation, this shoves the ArtMethod object corresponding
 // to the target dex method index into the target register vA and vA + 1.
 template<bool do_access_check>
-static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame,
-                                  const Instruction* inst) {
+static inline bool DoCreateLambda(Thread* self,
+                                  const Instruction* inst,
+                                  /*inout*/ShadowFrame& shadow_frame,
+                                  /*inout*/lambda::ClosureBuilder* closure_builder,
+                                  /*inout*/lambda::Closure* uninitialized_closure) {
+  DCHECK(closure_builder != nullptr);
+  DCHECK(uninitialized_closure != nullptr);
+  DCHECK_ALIGNED(uninitialized_closure, alignof(lambda::Closure));
+
   /*
    * create-lambda is opcode 0x21c
    * - vA is the target register where the closure will be stored into
@@ -171,16 +188,69 @@
   ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>(
       method_idx, &receiver, sf_method, self);
 
-  uint32_t vregA = inst->VRegA_21c();
+  uint32_t vreg_dest_closure = inst->VRegA_21c();
 
   if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
     CHECK(self->IsExceptionPending());
-    shadow_frame.SetVReg(vregA, 0u);
-    shadow_frame.SetVReg(vregA + 1, 0u);
+    shadow_frame.SetVReg(vreg_dest_closure, 0u);
+    shadow_frame.SetVReg(vreg_dest_closure + 1, 0u);
     return false;
   }
 
-  WriteLambdaClosureIntoVRegs(shadow_frame, *called_method, vregA);
+  lambda::ArtLambdaMethod* initialized_lambda_method;
+  // Initialize the ArtLambdaMethod with the right data.
+  {
+    lambda::ArtLambdaMethod* uninitialized_lambda_method =
+        reinterpret_cast<lambda::ArtLambdaMethod*>(
+            lambda::LeakingAllocator::AllocateMemory(self, sizeof(lambda::ArtLambdaMethod)));
+
+    std::string captured_variables_shorty = closure_builder->GetCapturedVariableShortyTypes();
+    std::string captured_variables_long_type_desc;
+
+    // Synthesize a long type descriptor from the short one.
+    for (char shorty : captured_variables_shorty) {
+      lambda::ShortyFieldType shorty_field_type(shorty);
+      if (shorty_field_type.IsObject()) {
+        // Not the true type, but good enough until we implement verifier support.
+        captured_variables_long_type_desc += "Ljava/lang/Object;";
+        UNIMPLEMENTED(FATAL) << "create-lambda with an object captured variable";
+      } else if (shorty_field_type.IsLambda()) {
+        // Not the true type, but good enough until we implement verifier support.
+        captured_variables_long_type_desc += "Ljava/lang/Runnable;";
+        UNIMPLEMENTED(FATAL) << "create-lambda with a lambda captured variable";
+      } else {
+        // The primitive types have the same length shorty or not, so this is always correct.
+        DCHECK(shorty_field_type.IsPrimitive());
+        captured_variables_long_type_desc += shorty_field_type;
+      }
+    }
+
+    // Copy strings to dynamically allocated storage. This leaks, but that's ok. Fix it later.
+    // TODO: Strings need to come from the DexFile, so they won't need their own allocations.
+    char* captured_variables_type_desc = lambda::LeakingAllocator::MakeFlexibleInstance<char>(
+        self,
+        captured_variables_long_type_desc.size() + 1);
+    strcpy(captured_variables_type_desc, captured_variables_long_type_desc.c_str());
+    char* captured_variables_shorty_copy = lambda::LeakingAllocator::MakeFlexibleInstance<char>(
+        self,
+        captured_variables_shorty.size() + 1);
+    strcpy(captured_variables_shorty_copy, captured_variables_shorty.c_str());
+
+    new (uninitialized_lambda_method) lambda::ArtLambdaMethod(called_method,
+                                                              captured_variables_type_desc,
+                                                              captured_variables_shorty_copy,
+                                                              true);  // innate lambda
+    initialized_lambda_method = uninitialized_lambda_method;
+  }
+
+  // Write all the closure captured variables and the closure header into the closure.
+  lambda::Closure* initialized_closure;
+  {
+    initialized_closure =
+        closure_builder->CreateInPlace(uninitialized_closure, initialized_lambda_method);
+  }
+
+  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, initialized_closure, vreg_dest_closure);
   return true;
 }
 
@@ -189,13 +259,11 @@
 // Validates that the art method points to a valid lambda function, otherwise throws
 // an exception and returns null.
 // (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
-static inline ArtMethod* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame,
-                                                           uint32_t vreg)
+static inline lambda::Closure* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame,
+                                                                 uint32_t vreg)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  // TODO(iam): Introduce a closure abstraction that will contain the captured variables
-  // instead of just an ArtMethod.
-  // This is temporarily using 2 vregs because a native ArtMethod can be up to 64-bit,
-  // but once proper variable capture is implemented it will only use 1 vreg.
+  // Lambda closures take up a consecutive pair of 2 virtual registers.
+  // On 32-bit the high bits are always 0.
   uint32_t vc_value_lo = shadow_frame.GetVReg(vreg);
   uint32_t vc_value_hi = shadow_frame.GetVReg(vreg + 1);
 
@@ -204,17 +272,285 @@
 
   // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
   static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-  ArtMethod* const called_method = reinterpret_cast<ArtMethod* const>(vc_value_ptr);
+  lambda::Closure* const lambda_closure = reinterpret_cast<lambda::Closure*>(vc_value_ptr);
+  DCHECK_ALIGNED(lambda_closure, alignof(lambda::Closure));
 
   // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
-  if (UNLIKELY(called_method == nullptr)) {
+  if (UNLIKELY(lambda_closure == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
     return nullptr;
-  } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
+  } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(lambda_closure->GetTargetMethod()))) {
+    // Sanity check against data corruption.
     return nullptr;
   }
 
-  return called_method;
+  return lambda_closure;
+}
+
+// Forward declaration for lock annotations. See below for documentation.
+template <bool do_access_check>
+static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
+                                                               uint32_t string_idx)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+// Find the c-string data corresponding to a dex file's string index.
+// Otherwise, returns null if not found and throws a VerifyError.
+//
+// Note that with do_access_check=false, we never return null because the verifier
+// must guard against invalid string indices.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+template <bool do_access_check>
+static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame,
+                                                               uint32_t string_idx) {
+  ArtMethod* method = shadow_frame.GetMethod();
+  const DexFile* dex_file = method->GetDexFile();
+
+  mirror::Class* declaring_class = method->GetDeclaringClass();
+  if (!do_access_check) {
+    // MethodVerifier refuses methods with string_idx out of bounds.
+    DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings());
+  } else {
+    // Access checks enabled: perform string index bounds ourselves.
+    if (string_idx >= dex_file->GetHeader().string_ids_size_) {
+      ThrowVerifyError(declaring_class, "String index '%" PRIu32 "' out of bounds",
+                       string_idx);
+      return nullptr;
+    }
+  }
+
+  const char* type_string = dex_file->StringDataByIdx(string_idx);
+
+  if (UNLIKELY(type_string == nullptr)) {
+    CHECK_EQ(false, do_access_check)
+        << " verifier should've caught invalid string index " << string_idx;
+    CHECK_EQ(true, do_access_check)
+        << " string idx size check should've caught invalid string index " << string_idx;
+  }
+
+  return type_string;
+}
+
+// Handles capture-variable instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+template<bool do_access_check>
+static inline bool DoCaptureVariable(Thread* self,
+                                     const Instruction* inst,
+                                     /*inout*/ShadowFrame& shadow_frame,
+                                     /*inout*/lambda::ClosureBuilder* closure_builder) {
+  DCHECK(closure_builder != nullptr);
+  using lambda::ShortyFieldType;
+  /*
+   * capture-variable is opcode 0xf6, fmt 0x21c
+   * - vA is the source register of the variable that will be captured
+   * - vB is the string ID of the variable's type that will be captured
+   */
+  const uint32_t source_vreg = inst->VRegA_21c();
+  const uint32_t string_idx = inst->VRegB_21c();
+  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
+
+  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
+                                                                                  string_idx);
+  if (UNLIKELY(type_string == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+
+  char type_first_letter = type_string[0];
+  ShortyFieldType shorty_type;
+  if (do_access_check &&
+      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
+    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
+                     "capture-variable vB must be a valid type");
+    return false;
+  } else {
+    // Already verified that the type is valid.
+    shorty_type = ShortyFieldType(type_first_letter);
+  }
+
+  const size_t captured_variable_count = closure_builder->GetCaptureCount();
+
+  // Note: types are specified explicitly so that the closure is packed tightly.
+  switch (shorty_type) {
+    case ShortyFieldType::kBoolean: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<bool>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kByte: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<int8_t>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kChar: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<uint16_t>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kShort: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<int16_t>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kInt: {
+      uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg);
+      closure_builder->CaptureVariablePrimitive<int32_t>(primitive_narrow_value);
+      break;
+    }
+    case ShortyFieldType::kDouble: {
+      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegDouble(source_vreg));
+      break;
+    }
+    case ShortyFieldType::kFloat: {
+      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegFloat(source_vreg));
+      break;
+    }
+    case ShortyFieldType::kLambda: {
+      UNIMPLEMENTED(FATAL) << " capture-variable with type kLambda";
+      // TODO: Capturing lambdas recursively will be done at a later time.
+      UNREACHABLE();
+    }
+    case ShortyFieldType::kLong: {
+      closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegLong(source_vreg));
+      break;
+    }
+    case ShortyFieldType::kObject: {
+      closure_builder->CaptureVariableObject(shadow_frame.GetVRegReference(source_vreg));
+      UNIMPLEMENTED(FATAL) << " capture-variable with type kObject";
+      // TODO: finish implementing this. disabled for now since we can't track lambda refs for GC.
+      UNREACHABLE();
+    }
+
+    default:
+      LOG(FATAL) << "Invalid shorty type value " << shorty_type;
+      UNREACHABLE();
+  }
+
+  DCHECK_EQ(captured_variable_count + 1, closure_builder->GetCaptureCount());
+
+  return true;
+}
+
+// Handles capture-variable instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+template<bool do_access_check>
+static inline bool DoLiberateVariable(Thread* self,
+                                     const Instruction* inst,
+                                     size_t captured_variable_index,
+                                     /*inout*/ShadowFrame& shadow_frame) {
+  using lambda::ShortyFieldType;
+  /*
+   * liberate-variable is opcode 0xf7, fmt 0x22c
+   * - vA is the destination register
+   * - vB is the register with the lambda closure in it
+   * - vC is the string ID which needs to be a valid field type descriptor
+   */
+
+  const uint32_t dest_vreg = inst->VRegA_22c();
+  const uint32_t closure_vreg = inst->VRegB_22c();
+  const uint32_t string_idx = inst->VRegC_22c();
+  // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type.
+
+
+  // Synthesize a long type descriptor from a shorty type descriptor list.
+  // TODO: Fix the dex encoding to contain the long and short type descriptors.
+  const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame,
+                                                                                  string_idx);
+  if (UNLIKELY(do_access_check && type_string == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    shadow_frame.SetVReg(dest_vreg, 0);
+    return false;
+  }
+
+  char type_first_letter = type_string[0];
+  ShortyFieldType shorty_type;
+  if (do_access_check &&
+      UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) {  // NOLINT: [whitespace/comma] [3]
+    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
+                     "liberate-variable vC must be a valid type");
+    shadow_frame.SetVReg(dest_vreg, 0);
+    return false;
+  } else {
+    // Already verified that the type is valid.
+    shorty_type = ShortyFieldType(type_first_letter);
+  }
+
+  // Check for closure being null *after* the type check.
+  // This way we can access the type info in case we fail later, to know how many vregs to clear.
+  const lambda::Closure* lambda_closure =
+      ReadLambdaClosureFromVRegsOrThrow(/*inout*/shadow_frame, closure_vreg);
+
+  // Failed lambda target runtime check, an exception was raised.
+  if (UNLIKELY(lambda_closure == nullptr)) {
+    CHECK(self->IsExceptionPending());
+
+    // Clear the destination vreg(s) to be safe.
+    shadow_frame.SetVReg(dest_vreg, 0);
+    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
+      shadow_frame.SetVReg(dest_vreg + 1, 0);
+    }
+    return false;
+  }
+
+  if (do_access_check &&
+      UNLIKELY(captured_variable_index >= lambda_closure->GetNumberOfCapturedVariables())) {
+    ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
+                     "liberate-variable captured variable index %zu out of bounds",
+                     lambda_closure->GetNumberOfCapturedVariables());
+    // Clear the destination vreg(s) to be safe.
+    shadow_frame.SetVReg(dest_vreg, 0);
+    if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
+      shadow_frame.SetVReg(dest_vreg + 1, 0);
+    }
+    return false;
+  }
+
+  // Verify that the runtime type of the captured-variable matches the requested dex type.
+  if (do_access_check) {
+    ShortyFieldType actual_type = lambda_closure->GetCapturedShortyType(captured_variable_index);
+    if (actual_type != shorty_type) {
+      ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(),
+                     "cannot liberate-variable of runtime type '%c' to dex type '%c'",
+                     static_cast<char>(actual_type),
+                     static_cast<char>(shorty_type));
+
+      shadow_frame.SetVReg(dest_vreg, 0);
+      if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) {
+        shadow_frame.SetVReg(dest_vreg + 1, 0);
+      }
+      return false;
+    }
+
+    if (actual_type.IsLambda() || actual_type.IsObject()) {
+      UNIMPLEMENTED(FATAL) << "liberate-variable type checks needs to "
+                           << "parse full type descriptor for objects and lambdas";
+    }
+  }
+
+  // Unpack the captured variable from the closure into the correct type, then save it to the vreg.
+  if (shorty_type.IsPrimitiveNarrow()) {
+    uint32_t primitive_narrow_value =
+        lambda_closure->GetCapturedPrimitiveNarrow(captured_variable_index);
+    shadow_frame.SetVReg(dest_vreg, primitive_narrow_value);
+  } else if (shorty_type.IsPrimitiveWide()) {
+      uint64_t primitive_wide_value =
+          lambda_closure->GetCapturedPrimitiveWide(captured_variable_index);
+      shadow_frame.SetVRegLong(dest_vreg, static_cast<int64_t>(primitive_wide_value));
+  } else if (shorty_type.IsObject()) {
+    mirror::Object* unpacked_object =
+        lambda_closure->GetCapturedObject(captured_variable_index);
+    shadow_frame.SetVRegReference(dest_vreg, unpacked_object);
+
+    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack objects yet";
+  } else if (shorty_type.IsLambda()) {
+    UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack lambdas yet";
+  } else {
+    LOG(FATAL) << "unreachable";
+    UNREACHABLE();
+  }
+
+  return true;
 }
 
 template<bool do_access_check>
@@ -229,22 +565,24 @@
    *
    * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB)
    */
-  uint32_t vC = inst->VRegC_25x();
-  ArtMethod* const called_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vC);
+  uint32_t vreg_closure = inst->VRegC_25x();
+  const lambda::Closure* lambda_closure =
+      ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vreg_closure);
 
   // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(called_method == nullptr)) {
+  if (UNLIKELY(lambda_closure == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
     return false;
   }
 
+  ArtMethod* const called_method = lambda_closure->GetTargetMethod();
   // Invoke a non-range lambda
   return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
                                               result);
 }
 
-// Handles invoke-XXX/range instructions.
+// Handles invoke-XXX/range instructions (other than invoke-lambda[-range]).
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
 static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
@@ -521,17 +859,17 @@
   uint32_t vreg_target_object = inst->VRegA_22x(inst_data);
   uint32_t vreg_source_closure = inst->VRegB_22x();
 
-  ArtMethod* closure_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
-                                                                vreg_source_closure);
+  lambda::Closure* lambda_closure = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
+                                                                      vreg_source_closure);
 
   // Failed lambda target runtime check, an exception was raised.
-  if (UNLIKELY(closure_method == nullptr)) {
+  if (UNLIKELY(lambda_closure == nullptr)) {
     CHECK(self->IsExceptionPending());
     return false;
   }
 
   mirror::Object* closure_as_object =
-      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(closure_method);
+      Runtime::Current()->GetLambdaBoxTable()->BoxLambda(lambda_closure);
 
   // Failed to box the lambda, an exception was raised.
   if (UNLIKELY(closure_as_object == nullptr)) {
@@ -564,16 +902,16 @@
     return false;
   }
 
-  ArtMethod* unboxed_closure = nullptr;
+  lambda::Closure* unboxed_closure = nullptr;
   // Raise an exception if unboxing fails.
   if (!Runtime::Current()->GetLambdaBoxTable()->UnboxLambda(boxed_closure_object,
-                                                            &unboxed_closure)) {
+                                                            /*out*/&unboxed_closure)) {
     CHECK(self->IsExceptionPending());
     return false;
   }
 
   DCHECK(unboxed_closure != nullptr);
-  WriteLambdaClosureIntoVRegs(shadow_frame, *unboxed_closure, vreg_target_closure);
+  WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, unboxed_closure, vreg_target_closure);
   return true;
 }
 
@@ -650,10 +988,13 @@
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
 // Explicitly instantiate all DoCreateLambda functions.
-#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                    \
-template SHARED_REQUIRES(Locks::mutator_lock_)                                 \
-bool DoCreateLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame,              \
-                        const Instruction* inst)
+#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                                 \
+template SHARED_REQUIRES(Locks::mutator_lock_)                                                    \
+bool DoCreateLambda<_do_check>(Thread* self,                                                      \
+                               const Instruction* inst,                                           \
+                               /*inout*/ShadowFrame& shadow_frame,                                \
+                               /*inout*/lambda::ClosureBuilder* closure_builder,                  \
+                               /*inout*/lambda::Closure* uninitialized_closure);
 
 EXPLICIT_DO_CREATE_LAMBDA_DECL(false);  // create-lambda
 EXPLICIT_DO_CREATE_LAMBDA_DECL(true);   // create-lambda
@@ -689,7 +1030,29 @@
 EXPLICIT_DO_UNBOX_LAMBDA_DECL(true);   // unbox-lambda
 #undef EXPLICIT_DO_BOX_LAMBDA_DECL
 
+// Explicitly instantiate all DoCaptureVariable functions.
+#define EXPLICIT_DO_CAPTURE_VARIABLE_DECL(_do_check)                                    \
+template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
+bool DoCaptureVariable<_do_check>(Thread* self,                                         \
+                                  const Instruction* inst,                              \
+                                  ShadowFrame& shadow_frame,                            \
+                                  lambda::ClosureBuilder* closure_builder);
 
+EXPLICIT_DO_CAPTURE_VARIABLE_DECL(false);  // capture-variable
+EXPLICIT_DO_CAPTURE_VARIABLE_DECL(true);   // capture-variable
+#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
+
+// Explicitly instantiate all DoLiberateVariable functions.
+#define EXPLICIT_DO_LIBERATE_VARIABLE_DECL(_do_check)                                   \
+template SHARED_REQUIRES(Locks::mutator_lock_)                                          \
+bool DoLiberateVariable<_do_check>(Thread* self,                                        \
+                                   const Instruction* inst,                             \
+                                   size_t captured_variable_index,                      \
+                                   ShadowFrame& shadow_frame);                          \
+
+EXPLICIT_DO_LIBERATE_VARIABLE_DECL(false);  // liberate-variable
+EXPLICIT_DO_LIBERATE_VARIABLE_DECL(true);   // liberate-variable
+#undef EXPLICIT_DO_LIBERATE_LAMBDA_DECL
 }  // namespace interpreter
 }  // namespace art
 
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 72e2ba0..9677d79 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -17,9 +17,13 @@
 #if !defined(__clang__)
 // Clang 3.4 fails to build the goto interpreter implementation.
 
+
+#include "base/stl_util.h"  // MakeUnique
 #include "interpreter_common.h"
 #include "safe_math.h"
 
+#include <memory>  // std::unique_ptr
+
 namespace art {
 namespace interpreter {
 
@@ -179,6 +183,9 @@
     }
   }
 
+  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
+  size_t lambda_captured_variable_index = 0;
+
   // Jump to first instruction.
   ADVANCE(0);
   UNREACHABLE_CODE_CHECK();
@@ -2412,7 +2419,20 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) {
-    bool success = DoCreateLambda<true>(self, shadow_frame, inst);
+    if (lambda_closure_builder == nullptr) {
+      // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
+      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
+    }
+
+    // TODO: these allocations should not leak, and the lambda method should not be local.
+    lambda::Closure* lambda_closure =
+        reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
+    bool success = DoCreateLambda<do_access_check>(self,
+                                                   inst,
+                                                   /*inout*/shadow_frame,
+                                                   /*inout*/lambda_closure_builder.get(),
+                                                   /*inout*/lambda_closure);
+    lambda_closure_builder.reset(nullptr);  // reset state of variables captured
     POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
   }
   HANDLE_EXPERIMENTAL_INSTRUCTION_END();
@@ -2429,6 +2449,31 @@
   }
   HANDLE_EXPERIMENTAL_INSTRUCTION_END();
 
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CAPTURE_VARIABLE) {
+    if (lambda_closure_builder == nullptr) {
+      lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
+    }
+
+    bool success = DoCaptureVariable<do_access_check>(self,
+                                                      inst,
+                                                      /*inout*/shadow_frame,
+                                                      /*inout*/lambda_closure_builder.get());
+
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(LIBERATE_VARIABLE) {
+    bool success = DoLiberateVariable<do_access_check>(self,
+                                                           inst,
+                                                           lambda_captured_variable_index,
+                                                           /*inout*/shadow_frame);
+    // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
+    lambda_captured_variable_index++;
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_3E)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2465,14 +2510,6 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F5)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
-  HANDLE_INSTRUCTION_START(UNUSED_F7)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_FA)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index b5cc11e..083dfb5 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -14,9 +14,12 @@
  * limitations under the License.
  */
 
+#include "base/stl_util.h"  // MakeUnique
 #include "interpreter_common.h"
 #include "safe_math.h"
 
+#include <memory>  // std::unique_ptr
+
 namespace art {
 namespace interpreter {
 
@@ -82,6 +85,11 @@
   const uint16_t* const insns = code_item->insns_;
   const Instruction* inst = Instruction::At(insns + dex_pc);
   uint16_t inst_data;
+
+  // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
+  // to keep this live for the scope of the entire function call.
+  std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
+  size_t lambda_captured_variable_index = 0;
   while (true) {
     dex_pc = inst->GetDexPc(insns);
     shadow_frame.SetDexPC(dex_pc);
@@ -2235,19 +2243,63 @@
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
+      case Instruction::CAPTURE_VARIABLE: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        if (lambda_closure_builder == nullptr) {
+          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
+        }
+
+        PREAMBLE();
+        bool success = DoCaptureVariable<do_access_check>(self,
+                                                          inst,
+                                                          /*inout*/shadow_frame,
+                                                          /*inout*/lambda_closure_builder.get());
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
       case Instruction::CREATE_LAMBDA: {
         if (!IsExperimentalInstructionEnabled(inst)) {
           UnexpectedOpcode(inst, shadow_frame);
         }
 
         PREAMBLE();
-        bool success = DoCreateLambda<do_access_check>(self, shadow_frame, inst);
+
+        if (lambda_closure_builder == nullptr) {
+          // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables.
+          lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>();
+        }
+
+        // TODO: these allocations should not leak, and the lambda method should not be local.
+        lambda::Closure* lambda_closure =
+            reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize()));
+        bool success = DoCreateLambda<do_access_check>(self,
+                                                       inst,
+                                                       /*inout*/shadow_frame,
+                                                       /*inout*/lambda_closure_builder.get(),
+                                                       /*inout*/lambda_closure);
+        lambda_closure_builder.reset(nullptr);  // reset state of variables captured
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
         break;
       }
-      case Instruction::UNUSED_F4:
-      case Instruction::UNUSED_F5:
-      case Instruction::UNUSED_F7: {
+      case Instruction::LIBERATE_VARIABLE: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoLiberateVariable<do_access_check>(self,
+                                                           inst,
+                                                           lambda_captured_variable_index,
+                                                           /*inout*/shadow_frame);
+        // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...'
+        lambda_captured_variable_index++;
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::UNUSED_F4: {
         if (!IsExperimentalInstructionEnabled(inst)) {
           UnexpectedOpcode(inst, shadow_frame);
         }
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 531e039..b5e28e9 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -60,7 +60,7 @@
       : path_(path),
         handle_(handle),
         needs_native_bridge_(false),
-        class_loader_(env->NewGlobalRef(class_loader)),
+        class_loader_(env->NewWeakGlobalRef(class_loader)),
         jni_on_load_lock_("JNI_OnLoad lock"),
         jni_on_load_cond_("JNI_OnLoad condition variable", jni_on_load_lock_),
         jni_on_load_thread_id_(self->GetThreadId()),
@@ -70,11 +70,11 @@
   ~SharedLibrary() {
     Thread* self = Thread::Current();
     if (self != nullptr) {
-      self->GetJniEnv()->DeleteGlobalRef(class_loader_);
+      self->GetJniEnv()->DeleteWeakGlobalRef(class_loader_);
     }
   }
 
-  jobject GetClassLoader() const {
+  jweak GetClassLoader() const {
     return class_loader_;
   }
 
@@ -131,7 +131,13 @@
     return needs_native_bridge_;
   }
 
-  void* FindSymbol(const std::string& symbol_name) {
+  void* FindSymbol(const std::string& symbol_name, const char* shorty = nullptr) {
+    return NeedsNativeBridge()
+        ? FindSymbolWithNativeBridge(symbol_name.c_str(), shorty)
+        : FindSymbolWithoutNativeBridge(symbol_name.c_str());
+  }
+
+  void* FindSymbolWithoutNativeBridge(const std::string& symbol_name) {
     CHECK(!NeedsNativeBridge());
 
     return dlsym(handle_, symbol_name.c_str());
@@ -160,9 +166,9 @@
   // True if a native bridge is required.
   bool needs_native_bridge_;
 
-  // The ClassLoader this library is associated with, a global JNI reference that is
+  // The ClassLoader this library is associated with, a weak global JNI reference that is
   // created/deleted with the scope of the library.
-  const jobject class_loader_;
+  const jweak class_loader_;
 
   // Guards remaining items.
   Mutex jni_on_load_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -184,7 +190,10 @@
     STLDeleteValues(&libraries_);
   }
 
-  void Dump(std::ostream& os) const {
+  // NO_THREAD_SAFETY_ANALYSIS since this may be called from Dumpable. Dumpable can't be annotated
+  // properly due to the template. The caller should be holding the jni_libraries_lock_.
+  void Dump(std::ostream& os) const NO_THREAD_SAFETY_ANALYSIS {
+    Locks::jni_libraries_lock_->AssertHeld(Thread::Current());
     bool first = true;
     for (const auto& library : libraries_) {
       if (!first) {
@@ -195,16 +204,17 @@
     }
   }
 
-  size_t size() const {
+  size_t size() const REQUIRES(Locks::jni_libraries_lock_) {
     return libraries_.size();
   }
 
-  SharedLibrary* Get(const std::string& path) {
+  SharedLibrary* Get(const std::string& path) REQUIRES(Locks::jni_libraries_lock_) {
     auto it = libraries_.find(path);
     return (it == libraries_.end()) ? nullptr : it->second;
   }
 
-  void Put(const std::string& path, SharedLibrary* library) {
+  void Put(const std::string& path, SharedLibrary* library)
+      REQUIRES(Locks::jni_libraries_lock_) {
     libraries_.Put(path, library);
   }
 
@@ -217,24 +227,18 @@
     const mirror::ClassLoader* declaring_class_loader = m->GetDeclaringClass()->GetClassLoader();
     ScopedObjectAccessUnchecked soa(Thread::Current());
     for (const auto& lib : libraries_) {
-      SharedLibrary* library = lib.second;
+      SharedLibrary* const library = lib.second;
       if (soa.Decode<mirror::ClassLoader*>(library->GetClassLoader()) != declaring_class_loader) {
         // We only search libraries loaded by the appropriate ClassLoader.
         continue;
       }
       // Try the short name then the long name...
-      void* fn;
-      if (library->NeedsNativeBridge()) {
-        const char* shorty = m->GetShorty();
-        fn = library->FindSymbolWithNativeBridge(jni_short_name, shorty);
-        if (fn == nullptr) {
-          fn = library->FindSymbolWithNativeBridge(jni_long_name, shorty);
-        }
-      } else {
-        fn = library->FindSymbol(jni_short_name);
-        if (fn == nullptr) {
-          fn = library->FindSymbol(jni_long_name);
-        }
+      const char* shorty = library->NeedsNativeBridge()
+          ? m->GetShorty()
+          : nullptr;
+      void* fn = library->FindSymbol(jni_short_name, shorty);
+      if (fn == nullptr) {
+        fn = library->FindSymbol(jni_long_name, shorty);
       }
       if (fn != nullptr) {
         VLOG(jni) << "[Found native code for " << PrettyMethod(m)
@@ -249,10 +253,49 @@
     return nullptr;
   }
 
- private:
-  AllocationTrackingSafeMap<std::string, SharedLibrary*, kAllocatorTagJNILibraries> libraries_;
-};
+  // Unload native libraries with cleared class loaders.
+  void UnloadNativeLibraries()
+      REQUIRES(!Locks::jni_libraries_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
+    std::vector<JNI_OnUnloadFn> unload_functions;
+    {
+      MutexLock mu(soa.Self(), *Locks::jni_libraries_lock_);
+      for (auto it = libraries_.begin(); it != libraries_.end(); ) {
+        SharedLibrary* const library = it->second;
+        // If class loader is null then it was unloaded, call JNI_OnUnload.
+        const jweak class_loader = library->GetClassLoader();
+        // If class_loader is a null jobject then it is the boot class loader. We should not unload
+        // the native libraries of the boot class loader.
+        if (class_loader != nullptr &&
+            soa.Decode<mirror::ClassLoader*>(class_loader) == nullptr) {
+          void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);
+          if (sym == nullptr) {
+            VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]";
+          } else {
+            VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]";
+            JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
+            unload_functions.push_back(jni_on_unload);
+          }
+          delete library;
+          it = libraries_.erase(it);
+        } else {
+          ++it;
+        }
+      }
+    }
+    // Do this without holding the jni libraries lock to prevent possible deadlocks.
+    for (JNI_OnUnloadFn fn : unload_functions) {
+      VLOG(jni) << "Calling JNI_OnUnload";
+      (*fn)(soa.Vm(), nullptr);
+    }
+  }
 
+ private:
+  AllocationTrackingSafeMap<std::string, SharedLibrary*, kAllocatorTagJNILibraries> libraries_
+      GUARDED_BY(Locks::jni_libraries_lock_);
+};
 
 class JII {
  public:
@@ -641,6 +684,10 @@
   }
 }
 
+void JavaVMExt::UnloadNativeLibraries() {
+  libraries_.get()->UnloadNativeLibraries();
+}
+
 bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader,
                                   std::string* error_msg) {
   error_msg->clear();
@@ -738,10 +785,8 @@
   void* sym;
   if (needs_native_bridge) {
     library->SetNeedsNativeBridge();
-    sym = library->FindSymbolWithNativeBridge("JNI_OnLoad", nullptr);
-  } else {
-    sym = dlsym(handle, "JNI_OnLoad");
   }
+  sym = library->FindSymbol("JNI_OnLoad", nullptr);
   if (sym == nullptr) {
     VLOG(jni) << "[No JNI_OnLoad found in \"" << path << "\"]";
     was_successful = true;
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index b539bbd..c1fbdc0 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -88,6 +88,11 @@
   bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject javaLoader,
                          std::string* error_msg);
 
+  // Unload native libraries with cleared class loaders.
+  void UnloadNativeLibraries()
+      REQUIRES(!Locks::jni_libraries_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   /**
    * Returns a pointer to the code for the native method 'm', found
    * using dlsym(3) on every native library that's been loaded so far.
@@ -184,7 +189,9 @@
   // Not guarded by globals_lock since we sometimes use SynchronizedGet in Thread::DecodeJObject.
   IndirectReferenceTable globals_;
 
-  std::unique_ptr<Libraries> libraries_ GUARDED_BY(Locks::jni_libraries_lock_);
+  // No lock annotation since UnloadNativeLibraries is called on libraries_ but locks the
+  // jni_libraries_lock_ internally.
+  std::unique_ptr<Libraries> libraries_;
 
   // Used by -Xcheck:jni.
   const JNIInvokeInterface* const unchecked_functions_;
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 643bc23..e73ba82 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -67,6 +67,9 @@
   void DumpInfo(std::ostream& os);
   // Add a timing logger to cumulative_timings_.
   void AddTimingLogger(const TimingLogger& logger);
+  JitInstrumentationCache* GetInstrumentationCache() const {
+    return instrumentation_cache_.get();
+  }
 
  private:
   Jit();
diff --git a/runtime/jit/jit_code_cache_test.cc b/runtime/jit/jit_code_cache_test.cc
index a6cbb71..c76dc11 100644
--- a/runtime/jit/jit_code_cache_test.cc
+++ b/runtime/jit/jit_code_cache_test.cc
@@ -49,8 +49,11 @@
   ASSERT_TRUE(reserved_code != nullptr);
   ASSERT_TRUE(code_cache->ContainsCodePtr(reserved_code));
   ASSERT_EQ(code_cache->NumMethods(), 1u);
-  ClassLinker* const cl = Runtime::Current()->GetClassLinker();
-  ArtMethod* method = &cl->AllocArtMethodArray(soa.Self(), 1)->At(0);
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* const class_linker = runtime->GetClassLinker();
+  ArtMethod* method = &class_linker->AllocArtMethodArray(soa.Self(),
+                                                         runtime->GetLinearAlloc(),
+                                                         1)->At(0);
   ASSERT_FALSE(code_cache->ContainsMethod(method));
   method->SetEntryPointFromQuickCompiledCode(reserved_code);
   ASSERT_TRUE(code_cache->ContainsMethod(method));
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index d437dd5..e9c16c1 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -24,11 +24,21 @@
 namespace art {
 namespace jit {
 
-class JitCompileTask : public Task {
+class JitCompileTask FINAL : public Task {
  public:
-  explicit JitCompileTask(ArtMethod* method) : method_(method) {}
+  explicit JitCompileTask(ArtMethod* method) : method_(method) {
+    ScopedObjectAccess soa(Thread::Current());
+    // Add a global ref to the class to prevent class unloading until compilation is done.
+    klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
+    CHECK(klass_ != nullptr);
+  }
 
-  virtual void Run(Thread* self) OVERRIDE {
+  ~JitCompileTask() {
+    ScopedObjectAccess soa(Thread::Current());
+    soa.Vm()->DeleteGlobalRef(soa.Self(), klass_);
+  }
+
+  void Run(Thread* self) OVERRIDE {
     ScopedObjectAccess soa(self);
     VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
     if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
@@ -36,12 +46,13 @@
     }
   }
 
-  virtual void Finalize() OVERRIDE {
+  void Finalize() OVERRIDE {
     delete this;
   }
 
  private:
   ArtMethod* const method_;
+  jobject klass_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
 };
@@ -104,5 +115,9 @@
   }
 }
 
+void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
+  thread_pool_->Wait(self, false, false);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 6fdef65..9eb464b 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -50,6 +50,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
   void DeleteThreadPool();
+  // Wait until there is no more pending compilation tasks.
+  void WaitForCompilationToFinish(Thread* self);
 
  private:
   size_t hot_method_threshold_;
diff --git a/runtime/lambda/art_lambda_method.h b/runtime/lambda/art_lambda_method.h
index 892d8c6..ea13eb7 100644
--- a/runtime/lambda/art_lambda_method.h
+++ b/runtime/lambda/art_lambda_method.h
@@ -35,7 +35,7 @@
   // (Ownership of strings is retained by the caller and the lifetime should exceed this class).
   ArtLambdaMethod(ArtMethod* target_method,
                   const char* captured_variables_type_descriptor,
-                  const char* captured_variables_shorty_,
+                  const char* captured_variables_shorty,
                   bool innate_lambda = true);
 
   // Get the target method for this lambda that would be used by the invoke-lambda dex instruction.
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
index 26575fd..8eef10b 100644
--- a/runtime/lambda/box_table.cc
+++ b/runtime/lambda/box_table.cc
@@ -18,6 +18,8 @@
 #include "base/mutex.h"
 #include "common_throws.h"
 #include "gc_root-inl.h"
+#include "lambda/closure.h"
+#include "lambda/leaking_allocator.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "thread.h"
@@ -26,11 +28,53 @@
 
 namespace art {
 namespace lambda {
+// Temporarily represent the lambda Closure as its raw bytes in an array.
+// TODO: Generate a proxy class for the closure when boxing the first time.
+using BoxedClosurePointerType = mirror::ByteArray*;
+
+static mirror::Class* GetBoxedClosureClass() SHARED_REQUIRES(Locks::mutator_lock_) {
+  return mirror::ByteArray::GetArrayClass();
+}
+
+namespace {
+  // Convenience functions to allocating/deleting box table copies of the closures.
+  struct ClosureAllocator {
+    // Deletes a Closure that was allocated through ::Allocate.
+    static void Delete(Closure* ptr) {
+      delete[] reinterpret_cast<char*>(ptr);
+    }
+
+    // Returns a well-aligned pointer to a newly allocated Closure on the 'new' heap.
+    static Closure* Allocate(size_t size) {
+      DCHECK_GE(size, sizeof(Closure));
+
+      // TODO: Maybe point to the interior of the boxed closure object after we add proxy support?
+      Closure* closure = reinterpret_cast<Closure*>(new char[size]);
+      DCHECK_ALIGNED(closure, alignof(Closure));
+      return closure;
+    }
+  };
+}  // namespace
 
 BoxTable::BoxTable()
   : allow_new_weaks_(true),
     new_weaks_condition_("lambda box table allowed weaks", *Locks::lambda_table_lock_) {}
 
+BoxTable::~BoxTable() {
+  // Free all the copies of our closures.
+  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ++map_iterator) {
+    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
+
+    Closure* closure = key_value_pair.first;
+
+    // Remove from the map first, so that it doesn't try to access dangling pointer.
+    map_iterator = map_.Erase(map_iterator);
+
+    // Safe to delete, no dangling pointers.
+    ClosureAllocator::Delete(closure);
+  }
+}
+
 mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) {
   Thread* self = Thread::Current();
 
@@ -58,22 +102,29 @@
 
   // Release the lambda table lock here, so that thread suspension is allowed.
 
-  // Convert the ArtMethod into a java.lang.reflect.Method which will serve
+  // Convert the Closure into a managed byte[] which will serve
   // as the temporary 'boxed' version of the lambda. This is good enough
   // to check all the basic object identities that a boxed lambda must retain.
+  // It's also good enough to contain all the captured primitive variables.
 
   // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
   // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
-  mirror::Method* method_as_object =
-      mirror::Method::CreateFromArtMethod(self, closure);
+  BoxedClosurePointerType closure_as_array_object =
+      mirror::ByteArray::Alloc(self, closure->GetSize());
+
   // There are no thread suspension points after this, so we don't need to put it into a handle.
 
-  if (UNLIKELY(method_as_object == nullptr)) {
+  if (UNLIKELY(closure_as_array_object == nullptr)) {
     // Most likely an OOM has occurred.
     CHECK(self->IsExceptionPending());
     return nullptr;
   }
 
+  // Write the raw closure data into the byte[].
+  closure->CopyTo(closure_as_array_object->GetRawData(sizeof(uint8_t),  // component size
+                                                      0 /*index*/),     // index
+                  closure_as_array_object->GetLength());
+
   // The method has been successfully boxed into an object, now insert it into the hash map.
   {
     MutexLock mu(self, *Locks::lambda_table_lock_);
@@ -87,38 +138,56 @@
       return value.Read();
     }
 
-    // Otherwise we should insert it into the hash map in this thread.
-    map_.Insert(std::make_pair(closure, ValueType(method_as_object)));
+    // Otherwise we need to insert it into the hash map in this thread.
+
+    // Make a copy for the box table to keep, in case the closure gets collected from the stack.
+    // TODO: GC may need to sweep for roots in the box table's copy of the closure.
+    Closure* closure_table_copy = ClosureAllocator::Allocate(closure->GetSize());
+    closure->CopyTo(closure_table_copy, closure->GetSize());
+
+    // The closure_table_copy needs to be deleted by us manually when we erase it from the map.
+
+    // Actually insert into the table.
+    map_.Insert({closure_table_copy, ValueType(closure_as_array_object)});
   }
 
-  return method_as_object;
+  return closure_as_array_object;
 }
 
 bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) {
   DCHECK(object != nullptr);
   *out_closure = nullptr;
 
+  Thread* self = Thread::Current();
+
   // Note that we do not need to access lambda_table_lock_ here
   // since we don't need to look at the map.
 
   mirror::Object* boxed_closure_object = object;
 
-  // Raise ClassCastException if object is not instanceof java.lang.reflect.Method
-  if (UNLIKELY(!boxed_closure_object->InstanceOf(mirror::Method::StaticClass()))) {
-    ThrowClassCastException(mirror::Method::StaticClass(), boxed_closure_object->GetClass());
+  // Raise ClassCastException if object is not instanceof byte[]
+  if (UNLIKELY(!boxed_closure_object->InstanceOf(GetBoxedClosureClass()))) {
+    ThrowClassCastException(GetBoxedClosureClass(), boxed_closure_object->GetClass());
     return false;
   }
 
   // TODO(iam): We must check that the closure object extends/implements the type
-  // specified in [type id]. This is not currently implemented since it's always a Method.
+  // specified in [type id]. This is not currently implemented since it's always a byte[].
 
   // If we got this far, the inputs are valid.
-  // Write out the java.lang.reflect.Method's embedded ArtMethod* into the vreg target.
-  mirror::AbstractMethod* boxed_closure_as_method =
-      down_cast<mirror::AbstractMethod*>(boxed_closure_object);
+  // Shuffle the byte[] back into a raw closure, then allocate it, copy, and return it.
+  BoxedClosurePointerType boxed_closure_as_array =
+      down_cast<BoxedClosurePointerType>(boxed_closure_object);
 
-  ArtMethod* unboxed_closure = boxed_closure_as_method->GetArtMethod();
-  DCHECK(unboxed_closure != nullptr);
+  const int8_t* unaligned_interior_closure = boxed_closure_as_array->GetData();
+
+  // Allocate a copy that can "escape" and copy the closure data into that.
+  Closure* unboxed_closure =
+      LeakingAllocator::MakeFlexibleInstance<Closure>(self, boxed_closure_as_array->GetLength());
+  // TODO: don't just memcpy the closure, it's unsafe when we add references to the mix.
+  memcpy(unboxed_closure, unaligned_interior_closure, boxed_closure_as_array->GetLength());
+
+  DCHECK_EQ(unboxed_closure->GetSize(), static_cast<size_t>(boxed_closure_as_array->GetLength()));
 
   *out_closure = unboxed_closure;
   return true;
@@ -127,7 +196,7 @@
 BoxTable::ValueType BoxTable::FindBoxedLambda(const ClosureType& closure) const {
   auto map_iterator = map_.Find(closure);
   if (map_iterator != map_.end()) {
-    const std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator;
+    const std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
     const ValueType& value = key_value_pair.second;
 
     DCHECK(!value.IsNull());  // Never store null boxes.
@@ -157,7 +226,7 @@
    */
   std::vector<ClosureType> remove_list;
   for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
-    std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator;
+    std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
 
     const ValueType& old_value = key_value_pair.second;
 
@@ -166,10 +235,15 @@
     mirror::Object* new_value = visitor->IsMarked(old_value_raw);
 
     if (new_value == nullptr) {
-      const ClosureType& closure = key_value_pair.first;
       // The object has been swept away.
+      const ClosureType& closure = key_value_pair.first;
+
       // Delete the entry from the map.
-      map_iterator = map_.Erase(map_.Find(closure));
+      map_iterator = map_.Erase(map_iterator);
+
+      // Clean up the memory by deleting the closure.
+      ClosureAllocator::Delete(closure);
+
     } else {
       // The object has been moved.
       // Update the map.
@@ -208,16 +282,33 @@
   new_weaks_condition_.Broadcast(self);
 }
 
-bool BoxTable::EqualsFn::operator()(const ClosureType& lhs, const ClosureType& rhs) const {
+void BoxTable::EmptyFn::MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const {
+  item.first = nullptr;
+
+  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  item.second = ValueType();  // Also clear the GC root.
+}
+
+bool BoxTable::EmptyFn::IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const {
+  return item.first == nullptr;
+}
+
+bool BoxTable::EqualsFn::operator()(const UnorderedMapKeyType& lhs,
+                                    const UnorderedMapKeyType& rhs) const {
   // Nothing needs this right now, but leave this assertion for later when
   // we need to look at the references inside of the closure.
-  if (kIsDebugBuild) {
-    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-  }
+  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
 
-  // TODO: Need rework to use read barriers once closures have references inside of them that can
-  // move. Until then, it's safe to just compare the data inside of it directly.
-  return lhs == rhs;
+  return lhs->ReferenceEquals(rhs);
+}
+
+size_t BoxTable::HashFn::operator()(const UnorderedMapKeyType& key) const {
+  const lambda::Closure* closure = key;
+  DCHECK_ALIGNED(closure, alignof(lambda::Closure));
+
+  // Need to hold mutator_lock_ before calling into Closure::GetHashCode.
+  Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  return closure->GetHashCode();
 }
 
 }  // namespace lambda
diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h
index 9ffda66..adb7332 100644
--- a/runtime/lambda/box_table.h
+++ b/runtime/lambda/box_table.h
@@ -34,6 +34,7 @@
 }  // namespace mirror
 
 namespace lambda {
+struct Closure;  // forward declaration
 
 /*
  * Store a table of boxed lambdas. This is required to maintain object referential equality
@@ -44,7 +45,7 @@
  */
 class BoxTable FINAL {
  public:
-  using ClosureType = art::ArtMethod*;
+  using ClosureType = art::lambda::Closure*;
 
   // Boxes a closure into an object. Returns null and throws an exception on failure.
   mirror::Object* BoxLambda(const ClosureType& closure)
@@ -72,10 +73,9 @@
       REQUIRES(!Locks::lambda_table_lock_);
 
   BoxTable();
-  ~BoxTable() = default;
+  ~BoxTable();
 
  private:
-  // FIXME: This needs to be a GcRoot.
   // Explanation:
   // - After all threads are suspended (exclusive mutator lock),
   //   the concurrent-copying GC can move objects from the "from" space to the "to" space.
@@ -97,30 +97,30 @@
   void BlockUntilWeaksAllowed()
       SHARED_REQUIRES(Locks::lambda_table_lock_);
 
+  // Wrap the Closure into a unique_ptr so that the HashMap can delete its memory automatically.
+  using UnorderedMapKeyType = ClosureType;
+
   // EmptyFn implementation for art::HashMap
   struct EmptyFn {
-    void MakeEmpty(std::pair<ClosureType, ValueType>& item) const {
-      item.first = nullptr;
-    }
-    bool IsEmpty(const std::pair<ClosureType, ValueType>& item) const {
-      return item.first == nullptr;
-    }
+    void MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const
+        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
+
+    bool IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const;
   };
 
   // HashFn implementation for art::HashMap
   struct HashFn {
-    size_t operator()(const ClosureType& key) const {
-      // TODO(iam): Rewrite hash function when ClosureType is no longer an ArtMethod*
-      return static_cast<size_t>(reinterpret_cast<uintptr_t>(key));
-    }
+    size_t operator()(const UnorderedMapKeyType& key) const
+        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
   };
 
   // EqualsFn implementation for art::HashMap
   struct EqualsFn {
-    bool operator()(const ClosureType& lhs, const ClosureType& rhs) const;
+    bool operator()(const UnorderedMapKeyType& lhs, const UnorderedMapKeyType& rhs) const
+        NO_THREAD_SAFETY_ANALYSIS;  // SHARED_REQUIRES(Locks::mutator_lock_)
   };
 
-  using UnorderedMap = art::HashMap<ClosureType,
+  using UnorderedMap = art::HashMap<UnorderedMapKeyType,
                                     ValueType,
                                     EmptyFn,
                                     HashFn,
diff --git a/runtime/lambda/closure.cc b/runtime/lambda/closure.cc
index 95a17c6..179e4ee 100644
--- a/runtime/lambda/closure.cc
+++ b/runtime/lambda/closure.cc
@@ -124,6 +124,55 @@
   memcpy(target, this, GetSize());
 }
 
+ArtMethod* Closure::GetTargetMethod() const {
+  return const_cast<ArtMethod*>(lambda_info_->GetArtMethod());
+}
+
+uint32_t Closure::GetHashCode() const {
+  // Start with a non-zero constant, a prime number.
+  uint32_t result = 17;
+
+  // Include the hash with the ArtMethod.
+  {
+    uintptr_t method = reinterpret_cast<uintptr_t>(GetTargetMethod());
+    result = 31 * result + Low32Bits(method);
+    if (sizeof(method) == sizeof(uint64_t)) {
+      result = 31 * result + High32Bits(method);
+    }
+  }
+
+  // Include a hash for each captured variable.
+  for (size_t i = 0; i < GetCapturedVariablesSize(); ++i) {
+    // TODO: not safe for GC-able values since the address can move and the hash code would change.
+    uint8_t captured_variable_raw_value;
+    CopyUnsafeAtOffset<uint8_t>(i, /*out*/&captured_variable_raw_value);  // NOLINT: [whitespace/comma] [3]
+
+    result = 31 * result + captured_variable_raw_value;
+  }
+
+  // TODO: Fix above loop to work for objects and lambdas.
+  static_assert(kClosureSupportsGarbageCollection == false,
+               "Need to update above loop to read the hash code from the "
+                "objects and lambdas recursively");
+
+  return result;
+}
+
+bool Closure::ReferenceEquals(const Closure* other) const {
+  DCHECK(other != nullptr);
+
+  // TODO: Need rework to use read barriers once closures have references inside of them that can
+  // move. Until then, it's safe to just compare the data inside of it directly.
+  static_assert(kClosureSupportsReferences == false,
+                "Unsafe to use memcmp in read barrier collector");
+
+  if (GetSize() != other->GetSize()) {
+    return false;
+  }
+
+  return memcmp(this, other, GetSize());
+}
+
 size_t Closure::GetNumberOfCapturedVariables() const {
   // TODO: refactor into art_lambda_method.h. Parsing should only be required here as a DCHECK.
   VariableInfo variable_info =
diff --git a/runtime/lambda/closure.h b/runtime/lambda/closure.h
index 60d117e..31ff194 100644
--- a/runtime/lambda/closure.h
+++ b/runtime/lambda/closure.h
@@ -49,6 +49,19 @@
   // The target_size must be at least as large as GetSize().
   void CopyTo(void* target, size_t target_size) const;
 
+  // Get the target method, i.e. the method that will be dispatched into with invoke-lambda.
+  ArtMethod* GetTargetMethod() const;
+
+  // Calculates the hash code. Value is recomputed each time.
+  uint32_t GetHashCode() const SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Is this the same closure as other? e.g. same target method, same variables captured.
+  //
+  // Determines whether the two Closures are interchangeable instances.
+  // Does *not* call Object#equals recursively. If two Closures compare ReferenceEquals true that
+  // means that they are interchangeable values (usually for the purpose of boxing/unboxing).
+  bool ReferenceEquals(const Closure* other) const SHARED_REQUIRES(Locks::mutator_lock_);
+
   // How many variables were captured?
   size_t GetNumberOfCapturedVariables() const;
 
diff --git a/runtime/lambda/closure_builder-inl.h b/runtime/lambda/closure_builder-inl.h
index 41a803b..3cec21f 100644
--- a/runtime/lambda/closure_builder-inl.h
+++ b/runtime/lambda/closure_builder-inl.h
@@ -35,6 +35,8 @@
 
   values_.push_back(value_storage);
   size_ += sizeof(T);
+
+  shorty_types_ += kShortyType;
 }
 
 }  // namespace lambda
diff --git a/runtime/lambda/closure_builder.cc b/runtime/lambda/closure_builder.cc
index 9c37db8..739e965 100644
--- a/runtime/lambda/closure_builder.cc
+++ b/runtime/lambda/closure_builder.cc
@@ -64,6 +64,8 @@
       UNIMPLEMENTED(FATAL) << "can't yet safely capture objects with read barrier";
     }
   }
+
+  shorty_types_ += ShortyFieldType::kObject;
 }
 
 void ClosureBuilder::CaptureVariableLambda(Closure* closure) {
@@ -78,6 +80,8 @@
 
   // A closure may be sized dynamically, so always query it for the true size.
   size_ += closure->GetSize();
+
+  shorty_types_ += ShortyFieldType::kLambda;
 }
 
 size_t ClosureBuilder::GetSize() const {
@@ -85,9 +89,15 @@
 }
 
 size_t ClosureBuilder::GetCaptureCount() const {
+  DCHECK_EQ(values_.size(), shorty_types_.size());
   return values_.size();
 }
 
+const std::string& ClosureBuilder::GetCapturedVariableShortyTypes() const {
+  DCHECK_EQ(values_.size(), shorty_types_.size());
+  return shorty_types_;
+}
+
 Closure* ClosureBuilder::CreateInPlace(void* memory, ArtLambdaMethod* target_method) const {
   DCHECK(memory != nullptr);
   DCHECK(target_method != nullptr);
@@ -138,11 +148,14 @@
                                    size_t variables_size) const {
   size_t total_size = header_size;
   const char* shorty_types = target_method->GetCapturedVariablesShortyTypeDescriptor();
+  DCHECK_STREQ(shorty_types, shorty_types_.c_str());
 
   size_t variables_offset = 0;
   size_t remaining_size = variables_size;
 
   const size_t shorty_count = target_method->GetNumberOfCapturedVariables();
+  DCHECK_EQ(shorty_count, GetCaptureCount());
+
   for (size_t i = 0; i < shorty_count; ++i) {
     ShortyFieldType shorty{shorty_types[i]};  // NOLINT [readability/braces] [4]
 
diff --git a/runtime/lambda/closure_builder.h b/runtime/lambda/closure_builder.h
index 542e12a..23eb484 100644
--- a/runtime/lambda/closure_builder.h
+++ b/runtime/lambda/closure_builder.h
@@ -40,13 +40,12 @@
 //
 // The mutator lock must be held for the duration of the lifetime of this object,
 // since it needs to temporarily store heap references into an internal list.
-class ClosureBuilder : ValueObject {
+class ClosureBuilder {
  public:
   using ShortyTypeEnum = decltype(ShortyFieldType::kByte);
 
-
   // Mark this primitive value to be captured as the specified type.
-  template <typename T, ShortyTypeEnum kShortyType>
+  template <typename T, ShortyTypeEnum kShortyType = ShortyFieldTypeSelectEnum<T>::value>
   void CaptureVariablePrimitive(T value);
 
   // Mark this object reference to be captured.
@@ -63,6 +62,9 @@
   // Returns how many variables have been captured so far.
   size_t GetCaptureCount() const;
 
+  // Get the list of captured variables' shorty field types.
+  const std::string& GetCapturedVariableShortyTypes() const;
+
   // Creates a closure in-place and writes out the data into 'memory'.
   // Memory must be at least 'GetSize' bytes large.
   // All previously marked data to be captured is now written out.
@@ -93,6 +95,7 @@
   size_t size_ = kInitialSize;
   bool is_dynamic_size_ = false;
   std::vector<ShortyFieldTypeTraits::MaxType> values_;
+  std::string shorty_types_;
 };
 
 }  // namespace lambda
diff --git a/runtime/lambda/leaking_allocator.cc b/runtime/lambda/leaking_allocator.cc
new file mode 100644
index 0000000..4910732
--- /dev/null
+++ b/runtime/lambda/leaking_allocator.cc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lambda/leaking_allocator.h"
+#include "linear_alloc.h"
+#include "runtime.h"
+
+namespace art {
+namespace lambda {
+
+void* LeakingAllocator::AllocateMemory(Thread* self, size_t byte_size) {
+  // TODO: use GetAllocatorForClassLoader to allocate lambda ArtMethod data.
+  return Runtime::Current()->GetLinearAlloc()->Alloc(self, byte_size);
+}
+
+}  // namespace lambda
+}  // namespace art
diff --git a/runtime/lambda/leaking_allocator.h b/runtime/lambda/leaking_allocator.h
new file mode 100644
index 0000000..c3222d0
--- /dev/null
+++ b/runtime/lambda/leaking_allocator.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
+#define ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
+
+#include <utility>  // std::forward
+
+namespace art {
+class Thread;  // forward declaration
+
+namespace lambda {
+
+// Temporary class to centralize all the leaking allocations.
+// Allocations made through this class are never freed, but it is a placeholder
+// that means that the calling code needs to be rewritten to properly:
+//
+// (a) Have a lifetime scoped to some other entity.
+// (b) Not be allocated over and over again if it was already allocated once (immutable data).
+//
+// TODO: do all of the above a/b for each callsite, and delete this class.
+class LeakingAllocator {
+ public:
+  // Allocate byte_size bytes worth of memory. Never freed.
+  static void* AllocateMemory(Thread* self, size_t byte_size);
+
+  // Make a new instance of T, flexibly sized, in-place at newly allocated memory. Never freed.
+  template <typename T, typename... Args>
+  static T* MakeFlexibleInstance(Thread* self, size_t byte_size, Args&&... args) {
+    return new (AllocateMemory(self, byte_size)) T(std::forward<Args>(args)...);
+  }
+
+  // Make a new instance of T in-place at newly allocated memory. Never freed.
+  template <typename T, typename... Args>
+  static T* MakeInstance(Thread* self, Args&&... args) {
+    return new (AllocateMemory(self, sizeof(T))) T(std::forward<Args>(args)...);
+  }
+};
+
+}  // namespace lambda
+}  // namespace art
+
+#endif  // ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index f27b615..c2a65d6 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -35,18 +35,31 @@
   static constexpr uint32_t InstanceSize() {
     return sizeof(ClassLoader);
   }
+
   ClassLoader* GetParent() SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldObject<ClassLoader>(OFFSET_OF_OBJECT_MEMBER(ClassLoader, parent_));
   }
+
   ClassTable* GetClassTable() SHARED_REQUIRES(Locks::mutator_lock_) {
     return reinterpret_cast<ClassTable*>(
         GetField64(OFFSET_OF_OBJECT_MEMBER(ClassLoader, class_table_)));
   }
+
   void SetClassTable(ClassTable* class_table) SHARED_REQUIRES(Locks::mutator_lock_) {
     SetField64<false>(OFFSET_OF_OBJECT_MEMBER(ClassLoader, class_table_),
                       reinterpret_cast<uint64_t>(class_table));
   }
 
+  LinearAlloc* GetAllocator() SHARED_REQUIRES(Locks::mutator_lock_) {
+    return reinterpret_cast<LinearAlloc*>(
+        GetField64(OFFSET_OF_OBJECT_MEMBER(ClassLoader, allocator_)));
+  }
+
+  void SetAllocator(LinearAlloc* allocator) SHARED_REQUIRES(Locks::mutator_lock_) {
+    SetField64<false>(OFFSET_OF_OBJECT_MEMBER(ClassLoader, allocator_),
+                      reinterpret_cast<uint64_t>(allocator));
+  }
+
  private:
   // Visit instance fields of the class loader as well as its associated classes.
   // Null class loader is handled by ClassLinker::VisitClassRoots.
@@ -61,6 +74,7 @@
   HeapReference<Object> proxyCache_;
   // Native pointer to class table, need to zero this out when image writing.
   uint32_t padding_ ATTRIBUTE_UNUSED;
+  uint64_t allocator_;
   uint64_t class_table_;
 
   friend struct art::ClassLoaderOffsets;  // for verifying offset information
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index e8633de..e215994 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -71,18 +71,14 @@
 
 int32_t Throwable::GetStackDepth() {
   Object* stack_state = GetStackState();
-  if (stack_state == nullptr) {
+  if (stack_state == nullptr || !stack_state->IsObjectArray()) {
     return -1;
   }
-  if (!stack_state->IsIntArray() && !stack_state->IsLongArray()) {
-    return -1;
-  }
-  mirror::PointerArray* method_trace = down_cast<mirror::PointerArray*>(stack_state->AsArray());
-  int32_t array_len = method_trace->GetLength();
-  // The format is [method pointers][pcs] so the depth is half the length (see method
-  // BuildInternalStackTraceVisitor::Init).
-  CHECK_EQ(array_len % 2, 0);
-  return array_len / 2;
+  mirror::ObjectArray<mirror::Object>* const trace = stack_state->AsObjectArray<mirror::Object>();
+  const int32_t array_len = trace->GetLength();
+  DCHECK_GT(array_len, 0);
+  // See method BuildInternalStackTraceVisitor::Init for the format.
+  return array_len - 1;
 }
 
 std::string Throwable::Dump() {
@@ -95,18 +91,22 @@
   result += "\n";
   Object* stack_state = GetStackState();
   // check stack state isn't missing or corrupt
-  if (stack_state != nullptr &&
-      (stack_state->IsIntArray() || stack_state->IsLongArray())) {
+  if (stack_state != nullptr && stack_state->IsObjectArray()) {
+    mirror::ObjectArray<mirror::Object>* object_array =
+        stack_state->AsObjectArray<mirror::Object>();
     // Decode the internal stack trace into the depth and method trace
-    // Format is [method pointers][pcs]
-    auto* method_trace = down_cast<mirror::PointerArray*>(stack_state->AsArray());
-    auto array_len = method_trace->GetLength();
+    // See method BuildInternalStackTraceVisitor::Init for the format.
+    DCHECK_GT(object_array->GetLength(), 0);
+    mirror::Object* methods_and_dex_pcs = object_array->Get(0);
+    DCHECK(methods_and_dex_pcs->IsIntArray() || methods_and_dex_pcs->IsLongArray());
+    mirror::PointerArray* method_trace = down_cast<mirror::PointerArray*>(methods_and_dex_pcs);
+    const int32_t array_len = method_trace->GetLength();
     CHECK_EQ(array_len % 2, 0);
     const auto depth = array_len / 2;
     if (depth == 0) {
       result += "(Throwable with empty stack trace)";
     } else {
-      auto ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      const size_t ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       for (int32_t i = 0; i < depth; ++i) {
         ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, ptr_size);
         uintptr_t dex_pc = method_trace->GetElementPtrSize<uintptr_t>(i + depth, ptr_size);
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 5c13e13..63f43cf 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -20,6 +20,7 @@
 #include "art_method-inl.h"
 #include "dex_instruction.h"
 #include "entrypoints/entrypoint_utils.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "handle_scope-inl.h"
 #include "mirror/class-inl.h"
@@ -36,8 +37,9 @@
   : self_(self), context_(self->GetLongJumpContext()), is_deoptimization_(is_deoptimization),
     method_tracing_active_(is_deoptimization ||
                            Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()),
-    handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_method_(nullptr),
-    handler_dex_pc_(0), clear_exception_(false), handler_frame_depth_(kInvalidFrameDepth) {
+    handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_quick_arg0_(0),
+    handler_method_(nullptr), handler_dex_pc_(0), clear_exception_(false),
+    handler_frame_depth_(kInvalidFrameDepth) {
 }
 
 // Finds catch handler.
@@ -260,19 +262,25 @@
 // Prepares deoptimization.
 class DeoptimizeStackVisitor FINAL : public StackVisitor {
  public:
-  DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler)
+  DeoptimizeStackVisitor(Thread* self,
+                         Context* context,
+                         QuickExceptionHandler* exception_handler,
+                         bool single_frame)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         exception_handler_(exception_handler),
         prev_shadow_frame_(nullptr),
-        stacked_shadow_frame_pushed_(false) {
+        stacked_shadow_frame_pushed_(false),
+        single_frame_deopt_(single_frame),
+        single_frame_done_(false) {
   }
 
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
     ArtMethod* method = GetMethod();
-    if (method == nullptr) {
-      // This is the upcall, we remember the frame and last pc so that we may long jump to them.
+    if (method == nullptr || single_frame_done_) {
+      // This is the upcall (or the next full frame in single-frame deopt), we remember the frame
+      // and last pc so that we may long jump to them.
       exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
       exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
       if (!stacked_shadow_frame_pushed_) {
@@ -295,7 +303,13 @@
       CHECK_EQ(GetFrameDepth(), 1U);
       return true;
     } else {
-      return HandleDeoptimization(method);
+      HandleDeoptimization(method);
+      if (single_frame_deopt_ && !IsInInlinedFrame()) {
+        // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
+        exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
+        single_frame_done_ = true;
+      }
+      return true;
     }
   }
 
@@ -304,7 +318,7 @@
     return static_cast<VRegKind>(kinds.at(reg * 2));
   }
 
-  bool HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) {
+  void HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) {
     const DexFile::CodeItem* code_item = m->GetCodeItem();
     CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m);
     uint16_t num_regs = code_item->registers_size_;
@@ -448,16 +462,20 @@
       // Will be popped after the long jump after DeoptimizeStack(),
       // right before interpreter::EnterInterpreterFromDeoptimize().
       stacked_shadow_frame_pushed_ = true;
-      GetThread()->PushStackedShadowFrame(new_frame,
-                                          StackedShadowFrameType::kDeoptimizationShadowFrame);
+      GetThread()->PushStackedShadowFrame(
+          new_frame,
+          single_frame_deopt_
+              ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame
+              : StackedShadowFrameType::kDeoptimizationShadowFrame);
     }
     prev_shadow_frame_ = new_frame;
-    return true;
   }
 
   QuickExceptionHandler* const exception_handler_;
   ShadowFrame* prev_shadow_frame_;
   bool stacked_shadow_frame_pushed_;
+  const bool single_frame_deopt_;
+  bool single_frame_done_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -468,13 +486,46 @@
     self_->DumpStack(LOG(INFO) << "Deoptimizing: ");
   }
 
-  DeoptimizeStackVisitor visitor(self_, context_, this);
+  DeoptimizeStackVisitor visitor(self_, context_, this, false);
   visitor.WalkStack(true);
 
   // Restore deoptimization exception
   self_->SetException(Thread::GetDeoptimizationException());
 }
 
+void QuickExceptionHandler::DeoptimizeSingleFrame() {
+  DCHECK(is_deoptimization_);
+
+  if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) {
+    LOG(INFO) << "Single-frame deopting:";
+    DumpFramesWithType(self_, true);
+  }
+
+  DeoptimizeStackVisitor visitor(self_, context_, this, true);
+  visitor.WalkStack(true);
+
+  // PC needs to be of the quick-to-interpreter bridge.
+  int32_t offset;
+  #ifdef __LP64__
+      offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value();
+  #else
+      offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value();
+  #endif
+  handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(self_) + offset);
+}
+
+void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() {
+  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
+
+  if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) {
+    // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to
+    // change how longjump works.
+    handler_quick_frame_ = reinterpret_cast<ArtMethod**>(
+        reinterpret_cast<uintptr_t>(handler_quick_frame_) - sizeof(void*));
+  }
+}
+
 // Unwinds all instrumentation stack frame prior to catch handler or upcall.
 class InstrumentationStackVisitor : public StackVisitor {
  public:
@@ -529,15 +580,67 @@
   }
 }
 
-void QuickExceptionHandler::DoLongJump() {
+void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) {
   // Place context back on thread so it will be available when we continue.
   self_->ReleaseLongJumpContext(context_);
   context_->SetSP(reinterpret_cast<uintptr_t>(handler_quick_frame_));
   CHECK_NE(handler_quick_frame_pc_, 0u);
   context_->SetPC(handler_quick_frame_pc_);
-  context_->SmashCallerSaves();
+  context_->SetArg0(handler_quick_arg0_);
+  if (smash_caller_saves) {
+    context_->SmashCallerSaves();
+  }
   context_->DoLongJump();
   UNREACHABLE();
 }
 
+// Prints out methods with their type of frame.
+class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor {
+ public:
+  DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        show_details_(show_details) {}
+
+  bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* method = GetMethod();
+    if (show_details_) {
+      LOG(INFO) << "|> pc   = " << std::hex << GetCurrentQuickFramePc();
+      LOG(INFO) << "|> addr = " << std::hex << reinterpret_cast<uintptr_t>(GetCurrentQuickFrame());
+      if (GetCurrentQuickFrame() != nullptr && method != nullptr) {
+        LOG(INFO) << "|> ret  = " << std::hex << GetReturnPc();
+      }
+    }
+    if (method == nullptr) {
+      // Transition, do go on, we want to unwind over bridges, all the way.
+      if (show_details_) {
+        LOG(INFO) << "N  <transition>";
+      }
+      return true;
+    } else if (method->IsRuntimeMethod()) {
+      if (show_details_) {
+        LOG(INFO) << "R  " << PrettyMethod(method, true);
+      }
+      return true;
+    } else {
+      bool is_shadow = GetCurrentShadowFrame() != nullptr;
+      LOG(INFO) << (is_shadow ? "S" : "Q")
+                << ((!is_shadow && IsInInlinedFrame()) ? "i" : " ")
+                << " "
+                << PrettyMethod(method, true);
+      return true;  // Go on.
+    }
+  }
+
+ private:
+  bool show_details_;
+
+  DISALLOW_COPY_AND_ASSIGN(DumpFramesWithTypeStackVisitor);
+};
+
+void QuickExceptionHandler::DumpFramesWithType(Thread* self, bool details) {
+  DumpFramesWithTypeStackVisitor visitor(self, details);
+  visitor.WalkStack(true);
+}
+
 }  // namespace art
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index 2e05c7e..89d6a25 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -49,6 +49,9 @@
   // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy"
   // shadow frame that will be executed with the interpreter.
   void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_);
+  void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Update the instrumentation stack by removing all methods that will be unwound
   // by the exception being thrown.
   void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -58,7 +61,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Long jump either to a catch handler or to the upcall.
-  NO_RETURN void DoLongJump() SHARED_REQUIRES(Locks::mutator_lock_);
+  NO_RETURN void DoLongJump(bool smash_caller_saves = true) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetHandlerQuickFrame(ArtMethod** handler_quick_frame) {
     handler_quick_frame_ = handler_quick_frame;
@@ -68,6 +71,10 @@
     handler_quick_frame_pc_ = handler_quick_frame_pc;
   }
 
+  void SetHandlerQuickArg0(uintptr_t handler_quick_arg0) {
+    handler_quick_arg0_ = handler_quick_arg0;
+  }
+
   ArtMethod* GetHandlerMethod() const {
     return handler_method_;
   }
@@ -92,6 +99,11 @@
     handler_frame_depth_ = frame_depth;
   }
 
+  // Walk the stack frames of the given thread, printing out non-runtime methods with their types
+  // of frames. Helps to verify that single-frame deopt really only deopted one frame.
+  static void DumpFramesWithType(Thread* self, bool details = false)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   Thread* const self_;
   Context* const context_;
@@ -103,6 +115,8 @@
   ArtMethod** handler_quick_frame_;
   // PC to branch to for the handler.
   uintptr_t handler_quick_frame_pc_;
+  // The value for argument 0.
+  uintptr_t handler_quick_arg0_;
   // The handler method to report to the debugger.
   ArtMethod* handler_method_;
   // The handler's dex PC, zero implies an uncaught exception.
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index daae401..85ac4aa 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -62,8 +62,10 @@
     if (heap != nullptr && heap->GetReadBarrierTable()->IsSet(old_ref)) {
       ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
       // Update the field atomically. This may fail if mutator updates before us, but it's ok.
-      obj->CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier<false, false>(
-          offset, old_ref, ref);
+      if (ref != old_ref) {
+        obj->CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier<false, false>(
+            offset, old_ref, ref);
+      }
     }
     AssertToSpaceInvariant(obj, offset, ref);
     return ref;
@@ -90,17 +92,17 @@
     // To be implemented.
     return ref;
   } else if (with_read_barrier && kUseTableLookupReadBarrier) {
-    if (kMaybeDuringStartup && IsDuringStartup()) {
-      // During startup, the heap may not be initialized yet. Just
-      // return the given ref.
-      return ref;
-    }
-    if (Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
+    Thread* self = Thread::Current();
+    if (self != nullptr &&
+        self->GetIsGcMarking() &&
+        Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
       MirrorType* old_ref = ref;
       ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
       // Update the field atomically. This may fail if mutator updates before us, but it's ok.
-      Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
-      atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, ref);
+      if (ref != old_ref) {
+        Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+        atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, ref);
+      }
     }
     AssertToSpaceInvariant(gc_root_source, ref);
     return ref;
@@ -127,19 +129,19 @@
     // To be implemented.
     return ref;
   } else if (with_read_barrier && kUseTableLookupReadBarrier) {
-    if (kMaybeDuringStartup && IsDuringStartup()) {
-      // During startup, the heap may not be initialized yet. Just
-      // return the given ref.
-      return ref;
-    }
-    if (Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
+    Thread* self = Thread::Current();
+    if (self != nullptr &&
+        self->GetIsGcMarking() &&
+        Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) {
       auto old_ref = mirror::CompressedReference<MirrorType>::FromMirrorPtr(ref);
       ref = reinterpret_cast<MirrorType*>(Mark(ref));
       auto new_ref = mirror::CompressedReference<MirrorType>::FromMirrorPtr(ref);
       // Update the field atomically. This may fail if mutator updates before us, but it's ok.
-      auto* atomic_root =
-          reinterpret_cast<Atomic<mirror::CompressedReference<MirrorType>>*>(root);
-      atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref);
+      if (new_ref.AsMirrorPtr() != old_ref.AsMirrorPtr()) {
+        auto* atomic_root =
+            reinterpret_cast<Atomic<mirror::CompressedReference<MirrorType>>*>(root);
+        atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref);
+      }
     }
     AssertToSpaceInvariant(gc_root_source, ref);
     return ref;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 6b144cf..1f447d0 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -274,9 +274,6 @@
     VLOG(jit) << "Deleting jit";
     jit_.reset(nullptr);
   }
-  linear_alloc_.reset();
-  arena_pool_.reset();
-  low_4gb_arena_pool_.reset();
 
   // Shutdown the fault manager if it was initialized.
   fault_manager.Shutdown();
@@ -290,7 +287,13 @@
   Thread::Shutdown();
   QuasiAtomic::Shutdown();
   verifier::MethodVerifier::Shutdown();
+
+  // Destroy allocators before shutting down the MemMap because they may use it.
+  linear_alloc_.reset();
+  low_4gb_arena_pool_.reset();
+  arena_pool_.reset();
   MemMap::Shutdown();
+
   // TODO: acquire a static mutex on Runtime to avoid racing.
   CHECK(instance_ == nullptr || instance_ == this);
   instance_ = nullptr;
@@ -941,13 +944,11 @@
   // can't be trimmed as easily.
   const bool use_malloc = IsAotCompiler();
   arena_pool_.reset(new ArenaPool(use_malloc, false));
-  if (IsCompiler() && Is64BitInstructionSet(kRuntimeISA)) {
+  if (IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA)) {
     // 4gb, no malloc. Explanation in header.
     low_4gb_arena_pool_.reset(new ArenaPool(false, true));
-    linear_alloc_.reset(new LinearAlloc(low_4gb_arena_pool_.get()));
-  } else {
-    linear_alloc_.reset(new LinearAlloc(arena_pool_.get()));
   }
+  linear_alloc_.reset(CreateLinearAlloc());
 
   BlockSignals();
   InitPlatformSignalHandlers();
@@ -1788,4 +1789,13 @@
   return verify_ == verifier::VerifyMode::kSoftFail;
 }
 
+LinearAlloc* Runtime::CreateLinearAlloc() {
+  // For 64 bit compilers, it needs to be in low 4GB in the case where we are cross compiling for a
+  // 32 bit target. In this case, we have 32 bit pointers in the dex cache arrays which can't hold
+  // when we have 64 bit ArtMethod pointers.
+  return (IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA))
+      ? new LinearAlloc(low_4gb_arena_pool_.get())
+      : new LinearAlloc(arena_pool_.get());
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index a35eac1..6154c34 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -570,6 +570,9 @@
   // Called from class linker.
   void SetSentinel(mirror::Object* sentinel) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Create a normal LinearAlloc or low 4gb version if we are 64 bit AOT compiler.
+  LinearAlloc* CreateLinearAlloc();
+
  private:
   static void InitPlatformSignalHandlers();
 
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index 04549c7..7ac17b6 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -92,6 +92,11 @@
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
     return result.first;
   }
+  iterator Put(const K& k, const V&& v) {
+    std::pair<iterator, bool> result = map_.emplace(k, std::move(v));
+    DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
+    return result.first;
+  }
 
   // Used to insert a new mapping at a known position for better performance.
   iterator PutBefore(iterator pos, const K& k, const V& v) {
@@ -100,10 +105,16 @@
     DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
     return map_.emplace_hint(pos, k, v);
   }
+  iterator PutBefore(iterator pos, const K& k, const V&& v) {
+    // Check that we're using the correct position and the key is not in the map.
+    DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first));
+    DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
+    return map_.emplace_hint(pos, k, std::move(v));
+  }
 
   // Used to insert a new mapping or overwrite an existing mapping. Note that if the value type
   // of this container is a pointer, any overwritten pointer will be lost and if this container
-  // was the owner, you have a leak.
+  // was the owner, you have a leak. Returns iterator pointing to the new or overwritten entry.
   iterator Overwrite(const K& k, const V& v) {
     std::pair<iterator, bool> result = map_.insert(std::make_pair(k, v));
     if (!result.second) {
diff --git a/runtime/stack.cc b/runtime/stack.cc
index d739743..1d21a64 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -110,7 +110,7 @@
 }
 
 InlineInfo StackVisitor::GetCurrentInlineInfo() const {
-  ArtMethod* outer_method = *GetCurrentQuickFrame();
+  ArtMethod* outer_method = GetOuterMethod();
   uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_);
   CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
@@ -194,11 +194,12 @@
 }
 
 bool StackVisitor::IsReferenceVReg(ArtMethod* m, uint16_t vreg) {
+  DCHECK_EQ(m, GetMethod());
   // Process register map (which native and runtime methods don't have)
   if (m->IsNative() || m->IsRuntimeMethod() || m->IsProxyMethod()) {
     return false;
   }
-  if (m->IsOptimized(sizeof(void*))) {
+  if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
     return true;  // TODO: Implement.
   }
   const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*));
@@ -251,7 +252,7 @@
     if (GetVRegFromDebuggerShadowFrame(vreg, kind, val)) {
       return true;
     }
-    if (m->IsOptimized(sizeof(void*))) {
+    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
       return GetVRegFromOptimizedCode(m, vreg, kind, val);
     } else {
       return GetVRegFromQuickCode(m, vreg, kind, val);
@@ -288,15 +289,15 @@
 
 bool StackVisitor::GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKind kind,
                                             uint32_t* val) const {
+  ArtMethod* outer_method = GetOuterMethod();
+  const void* code_pointer = outer_method->GetQuickOatCodePointer(sizeof(void*));
+  DCHECK(code_pointer != nullptr);
   DCHECK_EQ(m, GetMethod());
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
                                                     // its instructions?
   uint16_t number_of_dex_registers = code_item->registers_size_;
   DCHECK_LT(vreg, code_item->registers_size_);
-  ArtMethod* outer_method = *GetCurrentQuickFrame();
-  const void* code_pointer = outer_method->GetQuickOatCodePointer(sizeof(void*));
-  DCHECK(code_pointer != nullptr);
   CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
   StackMapEncoding encoding = code_info.ExtractEncoding();
 
@@ -405,7 +406,7 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably read registers without a context.
     DCHECK(m == GetMethod());
-    if (m->IsOptimized(sizeof(void*))) {
+    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
       return GetVRegPairFromOptimizedCode(m, vreg, kind_lo, kind_hi, val);
     } else {
       return GetVRegPairFromQuickCode(m, vreg, kind_lo, kind_hi, val);
@@ -481,7 +482,7 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    if (m->IsOptimized(sizeof(void*))) {
+    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
       return false;
     } else {
       return SetVRegFromQuickCode(m, vreg, new_value, kind);
@@ -590,7 +591,7 @@
   if (cur_quick_frame_ != nullptr) {
     DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
     DCHECK(m == GetMethod());
-    if (m->IsOptimized(sizeof(void*))) {
+    if (GetOuterMethod()->IsOptimized(sizeof(void*))) {
       return false;
     } else {
       return SetVRegPairFromQuickCode(m, vreg, new_value, kind_lo, kind_hi);
@@ -724,14 +725,14 @@
 uintptr_t StackVisitor::GetReturnPc() const {
   uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
   DCHECK(sp != nullptr);
-  uint8_t* pc_addr = sp + GetMethod()->GetReturnPcOffset().SizeValue();
+  uint8_t* pc_addr = sp + GetOuterMethod()->GetReturnPcOffset().SizeValue();
   return *reinterpret_cast<uintptr_t*>(pc_addr);
 }
 
 void StackVisitor::SetReturnPc(uintptr_t new_ret_pc) {
   uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
   CHECK(sp != nullptr);
-  uint8_t* pc_addr = sp + GetMethod()->GetReturnPcOffset().SizeValue();
+  uint8_t* pc_addr = sp + GetOuterMethod()->GetReturnPcOffset().SizeValue();
   *reinterpret_cast<uintptr_t*>(pc_addr) = new_ret_pc;
 }
 
@@ -840,23 +841,30 @@
     } else {
       CHECK(declaring_class == nullptr);
     }
-    auto* runtime = Runtime::Current();
-    auto* la = runtime->GetLinearAlloc();
-    if (!la->Contains(method)) {
-      // Check image space.
-      bool in_image = false;
-      for (auto& space : runtime->GetHeap()->GetContinuousSpaces()) {
-        if (space->IsImageSpace()) {
-          auto* image_space = space->AsImageSpace();
-          const auto& header = image_space->GetImageHeader();
-          const auto* methods = &header.GetMethodsSection();
-          if (methods->Contains(reinterpret_cast<const uint8_t*>(method) - image_space->Begin())) {
-            in_image = true;
-            break;
+    Runtime* const runtime = Runtime::Current();
+    LinearAlloc* const linear_alloc = runtime->GetLinearAlloc();
+    if (!linear_alloc->Contains(method)) {
+      // Check class linker linear allocs.
+      mirror::Class* klass = method->GetDeclaringClass();
+      LinearAlloc* const class_linear_alloc = (klass != nullptr)
+          ? ClassLinker::GetAllocatorForClassLoader(klass->GetClassLoader())
+          : linear_alloc;
+      if (!class_linear_alloc->Contains(method)) {
+        // Check image space.
+        bool in_image = false;
+        for (auto& space : runtime->GetHeap()->GetContinuousSpaces()) {
+          if (space->IsImageSpace()) {
+            auto* image_space = space->AsImageSpace();
+            const auto& header = image_space->GetImageHeader();
+            const auto* methods = &header.GetMethodsSection();
+            if (methods->Contains(reinterpret_cast<const uint8_t*>(method) - image_space->Begin())) {
+              in_image = true;
+              break;
+            }
           }
         }
+        CHECK(in_image) << PrettyMethod(method) << " not in linear alloc or image";
       }
-      CHECK(in_image) << PrettyMethod(method) << " not in linear alloc or image";
     }
     if (cur_quick_frame_ != nullptr) {
       method->AssertPcIsWithinQuickCode(cur_quick_frame_pc_);
diff --git a/runtime/stack.h b/runtime/stack.h
index b805239..31acf0e 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -62,6 +62,10 @@
 class MANAGED StackReference : public mirror::CompressedReference<MirrorType> {
 };
 
+// Forward declaration. Just calls the destructor.
+struct ShadowFrameDeleter;
+using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>;
+
 // ShadowFrame has 2 possible layouts:
 //  - interpreter - separate VRegs and reference arrays. References are in the reference array.
 //  - JNI - just VRegs, but where every VReg holds a reference.
@@ -77,21 +81,26 @@
   static ShadowFrame* CreateDeoptimizedFrame(uint32_t num_vregs, ShadowFrame* link,
                                              ArtMethod* method, uint32_t dex_pc) {
     uint8_t* memory = new uint8_t[ComputeSize(num_vregs)];
-    return Create(num_vregs, link, method, dex_pc, memory);
+    return CreateShadowFrameImpl(num_vregs, link, method, dex_pc, memory);
   }
 
   // Delete a ShadowFrame allocated on the heap for deoptimization.
   static void DeleteDeoptimizedFrame(ShadowFrame* sf) {
+    sf->~ShadowFrame();  // Explicitly destruct.
     uint8_t* memory = reinterpret_cast<uint8_t*>(sf);
     delete[] memory;
   }
 
-  // Create ShadowFrame for interpreter using provided memory.
-  static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link,
-                             ArtMethod* method, uint32_t dex_pc, void* memory) {
-    ShadowFrame* sf = new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
-    return sf;
-  }
+  // Create a shadow frame in a fresh alloca. This needs to be in the context of the caller.
+  // Inlining doesn't work, the compiler will still undo the alloca. So this needs to be a macro.
+#define CREATE_SHADOW_FRAME(num_vregs, link, method, dex_pc) ({                              \
+    size_t frame_size = ShadowFrame::ComputeSize(num_vregs);                                 \
+    void* alloca_mem = alloca(frame_size);                                                   \
+    ShadowFrameAllocaUniquePtr(                                                              \
+        ShadowFrame::CreateShadowFrameImpl((num_vregs), (link), (method), (dex_pc),          \
+                                           (alloca_mem)));                                   \
+    })
+
   ~ShadowFrame() {}
 
   // TODO(iam): Clean references array up since they're always there,
@@ -283,6 +292,15 @@
     return OFFSETOF_MEMBER(ShadowFrame, vregs_);
   }
 
+  // Create ShadowFrame for interpreter using provided memory.
+  static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs,
+                                            ShadowFrame* link,
+                                            ArtMethod* method,
+                                            uint32_t dex_pc,
+                                            void* memory) {
+    return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
+  }
+
  private:
   ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method,
               uint32_t dex_pc, bool has_reference_array)
@@ -326,6 +344,14 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(ShadowFrame);
 };
 
+struct ShadowFrameDeleter {
+  inline void operator()(ShadowFrame* frame) {
+    if (frame != nullptr) {
+      frame->~ShadowFrame();
+    }
+  }
+};
+
 class JavaFrameRootInfo : public RootInfo {
  public:
   JavaFrameRootInfo(uint32_t thread_id, const StackVisitor* stack_visitor, size_t vreg)
@@ -447,6 +473,10 @@
 
   ArtMethod* GetMethod() const SHARED_REQUIRES(Locks::mutator_lock_);
 
+  ArtMethod* GetOuterMethod() const {
+    return *GetCurrentQuickFrame();
+  }
+
   bool IsShadowFrame() const {
     return cur_shadow_frame_ != nullptr;
   }
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 8bf241b..f5d20bd 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -118,11 +118,8 @@
   }
 }
 
-inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
-  AssertThreadSuspensionIsAllowable();
+inline void Thread::TransitionToSuspendedAndRunCheckpoints(ThreadState new_state) {
   DCHECK_NE(new_state, kRunnable);
-  DCHECK_EQ(this, Thread::Current());
-  // Change to non-runnable state, thereby appearing suspended to the system.
   DCHECK_EQ(GetState(), kRunnable);
   union StateAndFlags old_state_and_flags;
   union StateAndFlags new_state_and_flags;
@@ -145,12 +142,9 @@
       break;
     }
   }
+}
 
-  // Change to non-runnable state, thereby appearing suspended to the system.
-  // Mark the release of the share of the mutator_lock_.
-  Locks::mutator_lock_->TransitionFromRunnableToSuspended(this);
-
-  // Once suspended - check the active suspend barrier flag
+inline void Thread::PassActiveSuspendBarriers() {
   while (true) {
     uint16_t current_flags = tls32_.state_and_flags.as_struct.flags;
     if (LIKELY((current_flags & (kCheckpointRequest | kActiveSuspendBarrier)) == 0)) {
@@ -159,11 +153,22 @@
       PassActiveSuspendBarriers(this);
     } else {
       // Impossible
-      LOG(FATAL) << "Fatal, thread transited into suspended without running the checkpoint";
+      LOG(FATAL) << "Fatal, thread transitioned into suspended without running the checkpoint";
     }
   }
 }
 
+inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
+  AssertThreadSuspensionIsAllowable();
+  DCHECK_EQ(this, Thread::Current());
+  // Change to non-runnable state, thereby appearing suspended to the system.
+  TransitionToSuspendedAndRunCheckpoints(new_state);
+  // Mark the release of the share of the mutator_lock_.
+  Locks::mutator_lock_->TransitionFromRunnableToSuspended(this);
+  // Once suspended - check the active suspend barrier flag
+  PassActiveSuspendBarriers();
+}
+
 inline ThreadState Thread::TransitionFromSuspendedToRunnable() {
   union StateAndFlags old_state_and_flags;
   old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
@@ -191,7 +196,9 @@
       PassActiveSuspendBarriers(this);
     } else if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) {
       // Impossible
-      LOG(FATAL) << "Fatal, wrong checkpoint flag";
+      LOG(FATAL) << "Transitioning to runnable with checkpoint flag, "
+                 << " flags=" << old_state_and_flags.as_struct.flags
+                 << " state=" << old_state_and_flags.as_struct.state;
     } else if ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
       // Wait while our suspend count is non-zero.
       MutexLock mu(this, *Locks::thread_suspend_count_lock_);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5bf895e..65f71ef 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -250,10 +250,16 @@
   tlsPtr_.stacked_shadow_frame_record = record;
 }
 
-ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type) {
+ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present) {
   StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record;
-  DCHECK(record != nullptr);
-  DCHECK_EQ(record->GetType(), type);
+  if (must_be_present) {
+    DCHECK(record != nullptr);
+    DCHECK_EQ(record->GetType(), type);
+  } else {
+    if (record == nullptr || record->GetType() != type) {
+      return nullptr;
+    }
+  }
   tlsPtr_.stacked_shadow_frame_record = record->GetLink();
   ShadowFrame* shadow_frame = record->GetShadowFrame();
   delete record;
@@ -1960,15 +1966,32 @@
         pointer_size_(Runtime::Current()->GetClassLinker()->GetImagePointerSize()) {}
 
   bool Init(int depth) SHARED_REQUIRES(Locks::mutator_lock_) ACQUIRE(Roles::uninterruptible_) {
-    // Allocate method trace with format [method pointers][pcs].
-    auto* cl = Runtime::Current()->GetClassLinker();
-    trace_ = cl->AllocPointerArray(self_, depth * 2);
-    const char* last_no_suspend_cause =
-        self_->StartAssertNoThreadSuspension("Building internal stack trace");
-    if (trace_ == nullptr) {
+    // Allocate method trace as an object array where the first element is a pointer array that
+    // contains the ArtMethod pointers and dex PCs. The rest of the elements are the declaring
+    // class of the ArtMethod pointers.
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    StackHandleScope<1> hs(self_);
+    mirror::Class* array_class = class_linker->GetClassRoot(ClassLinker::kObjectArrayClass);
+    // The first element is the methods and dex pc array, the other elements are declaring classes
+    // for the methods to ensure classes in the stack trace don't get unloaded.
+    Handle<mirror::ObjectArray<mirror::Object>> trace(
+        hs.NewHandle(
+            mirror::ObjectArray<mirror::Object>::Alloc(hs.Self(), array_class, depth + 1)));
+    if (trace.Get() == nullptr) {
+      // Acquire uninterruptible_ in all paths.
+      self_->StartAssertNoThreadSuspension("Building internal stack trace");
       self_->AssertPendingOOMException();
       return false;
     }
+    mirror::PointerArray* methods_and_pcs = class_linker->AllocPointerArray(self_, depth * 2);
+    const char* last_no_suspend_cause =
+        self_->StartAssertNoThreadSuspension("Building internal stack trace");
+    if (methods_and_pcs == nullptr) {
+      self_->AssertPendingOOMException();
+      return false;
+    }
+    trace->Set(0, methods_and_pcs);
+    trace_ = trace.Get();
     // If We are called from native, use non-transactional mode.
     CHECK(last_no_suspend_cause == nullptr) << last_no_suspend_cause;
     return true;
@@ -1990,16 +2013,24 @@
     if (m->IsRuntimeMethod()) {
       return true;  // Ignore runtime frames (in particular callee save).
     }
-    trace_->SetElementPtrSize<kTransactionActive>(
-        count_, m, pointer_size_);
-    trace_->SetElementPtrSize<kTransactionActive>(
-        trace_->GetLength() / 2 + count_, m->IsProxyMethod() ? DexFile::kDexNoIndex : GetDexPc(),
-            pointer_size_);
+    mirror::PointerArray* trace_methods_and_pcs = GetTraceMethodsAndPCs();
+    trace_methods_and_pcs->SetElementPtrSize<kTransactionActive>(count_, m, pointer_size_);
+    trace_methods_and_pcs->SetElementPtrSize<kTransactionActive>(
+        trace_methods_and_pcs->GetLength() / 2 + count_,
+        m->IsProxyMethod() ? DexFile::kDexNoIndex : GetDexPc(),
+        pointer_size_);
+    // Save the declaring class of the method to ensure that the declaring classes of the methods
+    // do not get unloaded while the stack trace is live.
+    trace_->Set(count_ + 1, m->GetDeclaringClass());
     ++count_;
     return true;
   }
 
-  mirror::PointerArray* GetInternalStackTrace() const {
+  mirror::PointerArray* GetTraceMethodsAndPCs() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return down_cast<mirror::PointerArray*>(trace_->Get(0));
+  }
+
+  mirror::ObjectArray<mirror::Object>* GetInternalStackTrace() const {
     return trace_;
   }
 
@@ -2009,8 +2040,11 @@
   int32_t skip_depth_;
   // Current position down stack trace.
   uint32_t count_;
-  // An array of the methods on the stack, the last entries are the dex PCs.
-  mirror::PointerArray* trace_;
+  // An object array where the first element is a pointer array that contains the ArtMethod
+  // pointers on the stack and dex PCs. The rest of the elements are the declaring
+  // class of the ArtMethod pointers. trace_[i+1] contains the declaring class of the ArtMethod of
+  // the i'th frame.
+  mirror::ObjectArray<mirror::Object>* trace_;
   // For cross compilation.
   const size_t pointer_size_;
 
@@ -2033,11 +2067,12 @@
     return nullptr;  // Allocation failed.
   }
   build_trace_visitor.WalkStack();
-  mirror::PointerArray* trace = build_trace_visitor.GetInternalStackTrace();
+  mirror::ObjectArray<mirror::Object>* trace = build_trace_visitor.GetInternalStackTrace();
   if (kIsDebugBuild) {
-    // Second half is dex PCs.
-    for (uint32_t i = 0; i < static_cast<uint32_t>(trace->GetLength() / 2); ++i) {
-      auto* method = trace->GetElementPtrSize<ArtMethod*>(
+    mirror::PointerArray* trace_methods = build_trace_visitor.GetTraceMethodsAndPCs();
+    // Second half of trace_methods is dex PCs.
+    for (uint32_t i = 0; i < static_cast<uint32_t>(trace_methods->GetLength() / 2); ++i) {
+      auto* method = trace_methods->GetElementPtrSize<ArtMethod*>(
           i, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
       CHECK(method != nullptr);
     }
@@ -2056,12 +2091,16 @@
 }
 
 jobjectArray Thread::InternalStackTraceToStackTraceElementArray(
-    const ScopedObjectAccessAlreadyRunnable& soa, jobject internal, jobjectArray output_array,
+    const ScopedObjectAccessAlreadyRunnable& soa,
+    jobject internal,
+    jobjectArray output_array,
     int* stack_depth) {
-  // Decode the internal stack trace into the depth, method trace and PC trace
-  int32_t depth = soa.Decode<mirror::PointerArray*>(internal)->GetLength() / 2;
+  // Decode the internal stack trace into the depth, method trace and PC trace.
+  // Subtract one for the methods and PC trace.
+  int32_t depth = soa.Decode<mirror::Array*>(internal)->GetLength() - 1;
+  DCHECK_GE(depth, 0);
 
-  auto* cl = Runtime::Current()->GetClassLinker();
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
 
   jobjectArray result;
 
@@ -2075,7 +2114,7 @@
   } else {
     // Create java_trace array and place in local reference table
     mirror::ObjectArray<mirror::StackTraceElement>* java_traces =
-        cl->AllocStackTraceElementArray(soa.Self(), depth);
+        class_linker->AllocStackTraceElementArray(soa.Self(), depth);
     if (java_traces == nullptr) {
       return nullptr;
     }
@@ -2087,7 +2126,12 @@
   }
 
   for (int32_t i = 0; i < depth; ++i) {
-    auto* method_trace = soa.Decode<mirror::PointerArray*>(internal);
+    mirror::ObjectArray<mirror::Object>* decoded_traces =
+        soa.Decode<mirror::Object*>(internal)->AsObjectArray<mirror::Object>();
+    // Methods and dex PC trace is element 0.
+    DCHECK(decoded_traces->Get(0)->IsIntArray() || decoded_traces->Get(0)->IsLongArray());
+    mirror::PointerArray* const method_trace =
+        down_cast<mirror::PointerArray*>(decoded_traces->Get(0));
     // Prepare parameters for StackTraceElement(String cls, String method, String file, int line)
     ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, sizeof(void*));
     uint32_t dex_pc = method_trace->GetElementPtrSize<uint32_t>(
diff --git a/runtime/thread.h b/runtime/thread.h
index 11f2e28..d262c62 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -108,7 +108,8 @@
 
 enum class StackedShadowFrameType {
   kShadowFrameUnderConstruction,
-  kDeoptimizationShadowFrame
+  kDeoptimizationShadowFrame,
+  kSingleFrameDeoptimizationShadowFrame
 };
 
 static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34;
@@ -246,17 +247,15 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Transition from non-runnable to runnable state acquiring share on mutator_lock_.
-  ThreadState TransitionFromSuspendedToRunnable()
+  ALWAYS_INLINE ThreadState TransitionFromSuspendedToRunnable()
       REQUIRES(!Locks::thread_suspend_count_lock_)
-      SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
-      ALWAYS_INLINE;
+      SHARED_LOCK_FUNCTION(Locks::mutator_lock_);
 
   // Transition from runnable into a state where mutator privileges are denied. Releases share of
   // mutator lock.
-  void TransitionFromRunnableToSuspended(ThreadState new_state)
+  ALWAYS_INLINE void TransitionFromRunnableToSuspended(ThreadState new_state)
       REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_)
-      UNLOCK_FUNCTION(Locks::mutator_lock_)
-      ALWAYS_INLINE;
+      UNLOCK_FUNCTION(Locks::mutator_lock_);
 
   // Once called thread suspension will cause an assertion failure.
   const char* StartAssertNoThreadSuspension(const char* cause) ACQUIRE(Roles::uninterruptible_) {
@@ -843,7 +842,7 @@
   void AssertHasDeoptimizationContext()
       SHARED_REQUIRES(Locks::mutator_lock_);
   void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type);
-  ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type);
+  ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present = true);
 
   // For debugger, find the shadow frame that corresponds to a frame id.
   // Or return null if there is none.
@@ -1016,11 +1015,15 @@
   // Dbg::Disconnected.
   ThreadState SetStateUnsafe(ThreadState new_state) {
     ThreadState old_state = GetState();
-    tls32_.state_and_flags.as_struct.state = new_state;
-    // if transit to a suspended state, check the pass barrier request.
-    if (UNLIKELY((new_state != kRunnable) &&
-                 (tls32_.state_and_flags.as_struct.flags & kActiveSuspendBarrier))) {
-      PassActiveSuspendBarriers(this);
+    if (old_state == kRunnable && new_state != kRunnable) {
+      // Need to run pending checkpoint and suspend barriers. Run checkpoints in runnable state in
+      // case they need to use a ScopedObjectAccess. If we are holding the mutator lock and a SOA
+      // attempts to TransitionFromSuspendedToRunnable, it results in a deadlock.
+      TransitionToSuspendedAndRunCheckpoints(new_state);
+      // Since we transitioned to a suspended state, check the pass barrier requests.
+      PassActiveSuspendBarriers();
+    } else {
+      tls32_.state_and_flags.as_struct.state = new_state;
     }
     return old_state;
   }
@@ -1063,6 +1066,12 @@
   void SetUpAlternateSignalStack();
   void TearDownAlternateSignalStack();
 
+  ALWAYS_INLINE void TransitionToSuspendedAndRunCheckpoints(ThreadState new_state)
+      REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_);
+
+  ALWAYS_INLINE void PassActiveSuspendBarriers()
+      REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_);
+
   // 32 bits of atomically changed state and flags. Keeping as 32 bits allows and atomic CAS to
   // change from being Suspended to Runnable without a suspend request occurring.
   union PACKED(4) StateAndFlags {
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index d8f80fa..0527d3a 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -16,7 +16,9 @@
 
 #include "thread_pool.h"
 
+#include "base/bit_utils.h"
 #include "base/casts.h"
+#include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/time_utils.h"
 #include "runtime.h"
@@ -30,10 +32,15 @@
                                    size_t stack_size)
     : thread_pool_(thread_pool),
       name_(name) {
+  // Add an inaccessible page to catch stack overflow.
+  stack_size += kPageSize;
   std::string error_msg;
   stack_.reset(MemMap::MapAnonymous(name.c_str(), nullptr, stack_size, PROT_READ | PROT_WRITE,
                                     false, false, &error_msg));
   CHECK(stack_.get() != nullptr) << error_msg;
+  CHECK_ALIGNED(stack_->Begin(), kPageSize);
+  int mprotect_result = mprotect(stack_->Begin(), kPageSize, PROT_NONE);
+  CHECK_EQ(mprotect_result, 0) << "Failed to mprotect() bottom page of thread pool worker stack.";
   const char* reason = "new thread pool worker thread";
   pthread_attr_t attr;
   CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), reason);
@@ -92,7 +99,8 @@
   while (GetThreadCount() < num_threads) {
     const std::string worker_name = StringPrintf("%s worker thread %zu", name_.c_str(),
                                                  GetThreadCount());
-    threads_.push_back(new ThreadPoolWorker(this, worker_name, ThreadPoolWorker::kDefaultStackSize));
+    threads_.push_back(
+        new ThreadPoolWorker(this, worker_name, ThreadPoolWorker::kDefaultStackSize));
   }
   // Wait for all of the threads to attach.
   creation_barier_.Wait(self);
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 3d4f04c..eed3e22 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -548,7 +548,8 @@
   MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(),
                           m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(),
                           true, true, false, true);
-  return verifier.FindStringInitMap();
+  // Avoid copying: The map is moved out of the verifier before the verifier is destroyed.
+  return std::move(verifier.FindStringInitMap());
 }
 
 SafeMap<uint32_t, std::set<uint32_t>>& MethodVerifier::FindStringInitMap() {
@@ -1007,6 +1008,9 @@
     case Instruction::kVerifyRegCWide:
       result = result && CheckWideRegisterIndex(inst->VRegC());
       break;
+    case Instruction::kVerifyRegCString:
+      result = result && CheckStringIndex(inst->VRegC());
+      break;
   }
   switch (inst->GetVerifyExtraFlags()) {
     case Instruction::kVerifyArrayData:
@@ -1299,17 +1303,17 @@
     return false;
   }
 
+  bool is_packed_switch = (*insns & 0xff) == Instruction::PACKED_SWITCH;
+
   uint32_t switch_count = switch_insns[1];
-  int32_t keys_offset, targets_offset;
+  int32_t targets_offset;
   uint16_t expected_signature;
-  if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
+  if (is_packed_switch) {
     /* 0=sig, 1=count, 2/3=firstKey */
     targets_offset = 4;
-    keys_offset = -1;
     expected_signature = Instruction::kPackedSwitchSignature;
   } else {
     /* 0=sig, 1=count, 2..count*2 = keys */
-    keys_offset = 2;
     targets_offset = 2 + 2 * switch_count;
     expected_signature = Instruction::kSparseSwitchSignature;
   }
@@ -1328,19 +1332,33 @@
                                       << ", count " << insn_count;
     return false;
   }
-  /* for a sparse switch, verify the keys are in ascending order */
-  if (keys_offset > 0 && switch_count > 1) {
-    int32_t last_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16);
-    for (uint32_t targ = 1; targ < switch_count; targ++) {
-      int32_t key =
-          static_cast<int32_t>(switch_insns[keys_offset + targ * 2]) |
-          static_cast<int32_t>(switch_insns[keys_offset + targ * 2 + 1] << 16);
-      if (key <= last_key) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid packed switch: last key=" << last_key
-                                          << ", this=" << key;
+
+  constexpr int32_t keys_offset = 2;
+  if (switch_count > 1) {
+    if (is_packed_switch) {
+      /* for a packed switch, verify that keys do not overflow int32 */
+      int32_t first_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16);
+      int32_t max_first_key =
+          std::numeric_limits<int32_t>::max() - (static_cast<int32_t>(switch_count) - 1);
+      if (first_key > max_first_key) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid packed switch: first_key=" << first_key
+                                          << ", switch_count=" << switch_count;
         return false;
       }
-      last_key = key;
+    } else {
+      /* for a sparse switch, verify the keys are in ascending order */
+      int32_t last_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16);
+      for (uint32_t targ = 1; targ < switch_count; targ++) {
+        int32_t key =
+            static_cast<int32_t>(switch_insns[keys_offset + targ * 2]) |
+            static_cast<int32_t>(switch_insns[keys_offset + targ * 2 + 1] << 16);
+        if (key <= last_key) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid sparse switch: last key=" << last_key
+                                            << ", this=" << key;
+          return false;
+        }
+        last_key = key;
+      }
     }
   }
   /* verify each switch target */
@@ -3148,6 +3166,13 @@
       Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement invoke-lambda verification
       break;
     }
+    case Instruction::CAPTURE_VARIABLE: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement capture-variable verification
+      break;
+    }
     case Instruction::CREATE_LAMBDA: {
       // Don't bother verifying, instead the interpreter will take the slow path with access checks.
       // If the code would've normally hard-failed, then the interpreter will throw the
@@ -3155,10 +3180,15 @@
       Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement create-lambda verification
       break;
     }
+    case Instruction::LIBERATE_VARIABLE: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement liberate-variable verification
+      break;
+    }
 
-    case Instruction::UNUSED_F4:
-    case Instruction::UNUSED_F5:
-    case Instruction::UNUSED_F7: {
+    case Instruction::UNUSED_F4: {
       DCHECK(false);  // TODO(iam): Implement opcodes for lambdas
       // Conservatively fail verification on release builds.
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
diff --git a/test/004-JniTest/expected.txt b/test/004-JniTest/expected.txt
index 49d9cc0..86ab37e 100644
--- a/test/004-JniTest/expected.txt
+++ b/test/004-JniTest/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 Super.<init>
 Super.<init>
 Subclass.<init>
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index db0dd32..be7888b 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -15,8 +15,9 @@
  */
 
 #include <assert.h>
-#include <stdio.h>
+#include <iostream>
 #include <pthread.h>
+#include <stdio.h>
 #include <vector>
 
 #include "jni.h"
@@ -27,13 +28,21 @@
 
 static JavaVM* jvm = nullptr;
 
-extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *) {
+extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void*) {
   assert(vm != nullptr);
   assert(jvm == nullptr);
   jvm = vm;
+  std::cout << "JNI_OnLoad called" << std::endl;
   return JNI_VERSION_1_6;
 }
 
+extern "C" JNIEXPORT void JNI_OnUnload(JavaVM*, void*) {
+  // std::cout since LOG(INFO) adds extra stuff like pid.
+  std::cout << "JNI_OnUnload called" << std::endl;
+  // Clear jvm for assert in test 004-JniTest.
+  jvm = nullptr;
+}
+
 static void* AttachHelper(void* arg) {
   assert(jvm != nullptr);
 
diff --git a/test/004-ReferenceMap/expected.txt b/test/004-ReferenceMap/expected.txt
index e69de29..6a5618e 100644
--- a/test/004-ReferenceMap/expected.txt
+++ b/test/004-ReferenceMap/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 55a77ac..285df18 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -49,7 +49,9 @@
       CHECK_REGS_CONTAIN_REFS(0x06U, true, 8, 1);  // v8: this, v1: x
       CHECK_REGS_CONTAIN_REFS(0x08U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x0cU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
-      CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
+      if (!m->IsOptimized(sizeof(void*))) {
+        CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1);  // v8: this, v3: y, v1: x
+      }
       CHECK_REGS_CONTAIN_REFS(0x10U, true, 8, 3, 1);  // v8: this, v3: y, v1: x
       // v2 is added because of the instruction at DexPC 0024. Object merges with 0 is Object. See:
       //   0024: move-object v3, v2
@@ -63,12 +65,18 @@
       // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions.
       CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
       CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
-      CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);  // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
-      // v5 is removed from the root set because there is a "merge" operation.
-      // See 0015: if-nez v2, 001f.
-      CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      if (!m->IsOptimized(sizeof(void*))) {
+        // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex)
+        CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1);
+        // v5 is removed from the root set because there is a "merge" operation.
+        // See 0015: if-nez v2, 001f.
+        CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
+      }
       CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1);  // v8: this, v2: y, v1: x (dead v0: ex)
-      CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
+
+      if (!m->IsOptimized(sizeof(void*))) {
+        CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
+      }
       CHECK_REGS_CONTAIN_REFS(0x29U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x2cU, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       // Note that it is OK for a compiler to not have a dex map at these two dex PCs because
diff --git a/test/004-SignalTest/expected.txt b/test/004-SignalTest/expected.txt
index fd5ec00..b3a0e1c 100644
--- a/test/004-SignalTest/expected.txt
+++ b/test/004-SignalTest/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 init signal test
 Caught NullPointerException
 Caught StackOverflowError
diff --git a/test/004-StackWalk/expected.txt b/test/004-StackWalk/expected.txt
index bde0024..5af68cd 100644
--- a/test/004-StackWalk/expected.txt
+++ b/test/004-StackWalk/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 1st call
 172001234567891011121314151617181920652310201919
 2nd call
diff --git a/test/004-UnsafeTest/expected.txt b/test/004-UnsafeTest/expected.txt
index e69de29..6a5618e 100644
--- a/test/004-UnsafeTest/expected.txt
+++ b/test/004-UnsafeTest/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/024-illegal-access/expected.txt b/test/024-illegal-access/expected.txt
index 5f951f4..0ae4a77 100644
--- a/test/024-illegal-access/expected.txt
+++ b/test/024-illegal-access/expected.txt
@@ -1,2 +1,5 @@
 Got expected failure 1
 Got expected failure 2
+Got expected failure 3
+Got expected failure 4
+Got expected failure 5
diff --git a/test/024-illegal-access/src/Main.java b/test/024-illegal-access/src/Main.java
index bde73e9..84c7114 100644
--- a/test/024-illegal-access/src/Main.java
+++ b/test/024-illegal-access/src/Main.java
@@ -17,7 +17,7 @@
 public class Main {
     static public void main(String[] args) {
         try {
-            PublicAccess.main();
+            PublicAccess.accessStaticField();
             System.err.println("ERROR: call 1 not expected to succeed");
         } catch (VerifyError ve) {
             // dalvik
@@ -28,14 +28,41 @@
         }
 
         try {
-            CheckInstanceof.main(new Object());
+            PublicAccess.accessStaticMethod();
             System.err.println("ERROR: call 2 not expected to succeed");
-        } catch (VerifyError ve) {
-            // dalvik
-            System.out.println("Got expected failure 2");
         } catch (IllegalAccessError iae) {
             // reference
             System.out.println("Got expected failure 2");
         }
+
+        try {
+            PublicAccess.accessInstanceField();
+            System.err.println("ERROR: call 3 not expected to succeed");
+        } catch (VerifyError ve) {
+            // dalvik
+            System.out.println("Got expected failure 3");
+        } catch (IllegalAccessError iae) {
+            // reference
+            System.out.println("Got expected failure 3");
+        }
+
+        try {
+            PublicAccess.accessInstanceMethod();
+            System.err.println("ERROR: call 4 not expected to succeed");
+        } catch (IllegalAccessError iae) {
+            // reference
+            System.out.println("Got expected failure 4");
+        }
+
+        try {
+            CheckInstanceof.main(new Object());
+            System.err.println("ERROR: call 5 not expected to succeed");
+        } catch (VerifyError ve) {
+            // dalvik
+            System.out.println("Got expected failure 5");
+        } catch (IllegalAccessError iae) {
+            // reference
+            System.out.println("Got expected failure 5");
+        }
     }
 }
diff --git a/test/024-illegal-access/src/PublicAccess.java b/test/024-illegal-access/src/PublicAccess.java
index 4e72cd4..e3fef85 100644
--- a/test/024-illegal-access/src/PublicAccess.java
+++ b/test/024-illegal-access/src/PublicAccess.java
@@ -18,8 +18,20 @@
  * Some stuff for access checks.
  */
 public class PublicAccess {
-    public static void main() {
-        String shouldFail = SemiPrivate.mPrivvy;
+    public static void accessStaticField() {
+        String shouldFail = SemiPrivate.mStaticPrivvy;
+        System.out.println("Got " + shouldFail);
+    }
+    public static void accessStaticMethod() {
+        String shouldFail = SemiPrivate.privvyStaticMethod();
+        System.out.println("Got " + shouldFail);
+    }
+    public static void accessInstanceField() {
+        String shouldFail = new SemiPrivate().mInstancePrivvy;
+        System.out.println("Got " + shouldFail);
+    }
+    public static void accessInstanceMethod() {
+        String shouldFail = new SemiPrivate().privvyInstanceMethod();
         System.out.println("Got " + shouldFail);
     }
 }
diff --git a/test/024-illegal-access/src/SemiPrivate.java b/test/024-illegal-access/src/SemiPrivate.java
index 06b16c4..62e0d05 100644
--- a/test/024-illegal-access/src/SemiPrivate.java
+++ b/test/024-illegal-access/src/SemiPrivate.java
@@ -18,5 +18,15 @@
  * Version with package scope access.
  */
 public class SemiPrivate {
-    /* not private */ static String mPrivvy = "stuff";
+    /* not private */ static String mStaticPrivvy = "stuff";
+
+    /* not private */ static String privvyStaticMethod() {
+      return "stuff";
+    }
+
+    /* not private */ String mInstancePrivvy = "stuff";
+
+    /* not private */ String privvyInstanceMethod() {
+      return "stuff";
+    }
 }
diff --git a/test/024-illegal-access/src2/SemiPrivate.java b/test/024-illegal-access/src2/SemiPrivate.java
index 064265a..4f36a07 100644
--- a/test/024-illegal-access/src2/SemiPrivate.java
+++ b/test/024-illegal-access/src2/SemiPrivate.java
@@ -18,5 +18,15 @@
  * Version with private access.
  */
 public class SemiPrivate {
-    private static String mPrivvy = "stuff";
+    private static String mStaticPrivvy = "stuff";
+
+    private static String privvyStaticMethod() {
+      return "stuff";
+    }
+
+    private String mInstancePrivvy = "stuff";
+
+    private String privvyInstanceMethod() {
+      return "stuff";
+    }
 }
diff --git a/test/044-proxy/expected.txt b/test/044-proxy/expected.txt
index f86948a..052c8fa 100644
--- a/test/044-proxy/expected.txt
+++ b/test/044-proxy/expected.txt
@@ -93,4 +93,5 @@
 Got expected exception
 Proxy narrowed invocation return type passed
 5.8
+JNI_OnLoad called
 callback
diff --git a/test/051-thread/expected.txt b/test/051-thread/expected.txt
index 54e34af..c6cd4f8 100644
--- a/test/051-thread/expected.txt
+++ b/test/051-thread/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 thread test starting
 testThreadCapacity thread count: 512
 testThreadDaemons starting thread 'TestDaemonThread'
diff --git a/test/088-monitor-verification/expected.txt b/test/088-monitor-verification/expected.txt
index 13b8c73..2cb8f2c 100644
--- a/test/088-monitor-verification/expected.txt
+++ b/test/088-monitor-verification/expected.txt
@@ -1,12 +1,7 @@
+JNI_OnLoad called
 recursiveSync ok
 nestedMayThrow ok
 constantLock ok
 notNested ok
 twoPath ok
 triplet ok
-OK
-TooDeep
-NotStructuredOverUnlock
-NotStructuredUnderUnlock
-UnbalancedJoin
-UnbalancedStraight
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index 53b72e9..fc5755b 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -220,6 +220,11 @@
 
     // Smali testing code.
     private static void runSmaliTests() {
+        if (!hasOatFile() || runtimeIsSoftFail() || isCallerInterpreted()) {
+            // Skip test, this seems to be a non-compiled code test configuration.
+            return;
+        }
+
         runTest("OK", new Object[] { new Object(), new Object() }, null);
         runTest("TooDeep", new Object[] { new Object() }, null);
         runTest("NotStructuredOverUnlock", new Object[] { new Object() },
@@ -231,7 +236,6 @@
     }
 
     private static void runTest(String className, Object[] parameters, Class<?> excType) {
-        System.out.println(className);
         try {
             Class<?> c = Class.forName(className);
 
@@ -275,4 +279,7 @@
     // Helpers for the smali code.
     public static native void assertCallerIsInterpreted();
     public static native void assertCallerIsManaged();
+    public static native boolean hasOatFile();
+    public static native boolean runtimeIsSoftFail();
+    public static native boolean isCallerInterpreted();
 }
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index 372ecd0..b003307 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -17,6 +17,7 @@
     name:testSignal, signature:()I, shorty:I.
     name:testZeroLengthByteBuffers, signature:()V, shorty:V.
 trampoline_JNI_OnLoad called!
+JNI_OnLoad called
 Getting trampoline for Java_Main_testFindClassOnAttachedNativeThread with shorty V.
 trampoline_Java_Main_testFindClassOnAttachedNativeThread called!
 Getting trampoline for Java_Main_testFindFieldOnAttachedNativeThreadNative with shorty V.
diff --git a/test/116-nodex2oat/expected.txt b/test/116-nodex2oat/expected.txt
index 05b1c2f..157dfc4 100644
--- a/test/116-nodex2oat/expected.txt
+++ b/test/116-nodex2oat/expected.txt
@@ -1,6 +1,9 @@
 Run -Xnodex2oat
+JNI_OnLoad called
 Has oat is false, is dex2oat enabled is false.
 Run -Xdex2oat
+JNI_OnLoad called
 Has oat is true, is dex2oat enabled is true.
 Run default
+JNI_OnLoad called
 Has oat is true, is dex2oat enabled is true.
diff --git a/test/116-nodex2oat/nodex2oat.cc b/test/116-nodex2oat/nodex2oat.cc
deleted file mode 100644
index 131af31..0000000
--- a/test/116-nodex2oat/nodex2oat.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "class_linker.h"
-#include "dex_file-inl.h"
-#include "mirror/class-inl.h"
-#include "scoped_thread_state_change.h"
-#include "thread.h"
-
-namespace art {
-
-class NoDex2OatTest {
- public:
-  static bool hasOat(jclass cls) {
-    ScopedObjectAccess soa(Thread::Current());
-    mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
-    const DexFile& dex_file = klass->GetDexFile();
-    const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
-    return oat_dex_file != nullptr;
-  }
-};
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasOat(JNIEnv*, jclass cls) {
-  return NoDex2OatTest::hasOat(cls);
-}
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isDex2OatEnabled(JNIEnv*, jclass) {
-  return Runtime::Current()->IsDex2OatEnabled();
-}
-
-}  // namespace art
diff --git a/test/116-nodex2oat/src/Main.java b/test/116-nodex2oat/src/Main.java
index 086ffb9..229735f 100644
--- a/test/116-nodex2oat/src/Main.java
+++ b/test/116-nodex2oat/src/Main.java
@@ -18,16 +18,16 @@
   public static void main(String[] args) {
     System.loadLibrary(args[0]);
     System.out.println(
-        "Has oat is " + hasOat() + ", is dex2oat enabled is " + isDex2OatEnabled() + ".");
+        "Has oat is " + hasOatFile() + ", is dex2oat enabled is " + isDex2OatEnabled() + ".");
 
-    if (hasOat() && !isDex2OatEnabled()) {
+    if (hasOatFile() && !isDex2OatEnabled()) {
       throw new Error("Application with dex2oat disabled runs with an oat file");
-    } else if (!hasOat() && isDex2OatEnabled()) {
+    } else if (!hasOatFile() && isDex2OatEnabled()) {
       throw new Error("Application with dex2oat enabled runs without an oat file");
     }
   }
 
-  private native static boolean hasOat();
+  private native static boolean hasOatFile();
 
   private native static boolean isDex2OatEnabled();
 }
diff --git a/test/117-nopatchoat/expected.txt b/test/117-nopatchoat/expected.txt
index 5cc02d1..0cd4715 100644
--- a/test/117-nopatchoat/expected.txt
+++ b/test/117-nopatchoat/expected.txt
@@ -1,9 +1,12 @@
 Run without dex2oat/patchoat
+JNI_OnLoad called
 dex2oat & patchoat are disabled, has oat is true, has executable oat is expected.
 This is a function call
 Run with dexoat/patchoat
+JNI_OnLoad called
 dex2oat & patchoat are enabled, has oat is true, has executable oat is expected.
 This is a function call
 Run default
+JNI_OnLoad called
 dex2oat & patchoat are enabled, has oat is true, has executable oat is expected.
 This is a function call
diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc
index 7eac412..3e533ad 100644
--- a/test/117-nopatchoat/nopatchoat.cc
+++ b/test/117-nopatchoat/nopatchoat.cc
@@ -16,7 +16,10 @@
 
 #include "class_linker.h"
 #include "dex_file-inl.h"
+#include "gc/heap.h"
+#include "gc/space/image_space.h"
 #include "mirror/class-inl.h"
+#include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 
@@ -31,6 +34,11 @@
     return dex_file.GetOatDexFile();
   }
 
+  static bool isRelocationDeltaZero() {
+    gc::space::ImageSpace* space = Runtime::Current()->GetHeap()->GetImageSpace();
+    return space != nullptr && space->GetImageHeader().GetPatchDelta() == 0;
+  }
+
   static bool hasExecutableOat(jclass cls) {
     const OatFile::OatDexFile* oat_dex_file = getOatDexFile(cls);
 
@@ -49,6 +57,10 @@
   }
 };
 
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isRelocationDeltaZero(JNIEnv*, jclass) {
+  return NoPatchoatTest::isRelocationDeltaZero();
+}
+
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasExecutableOat(JNIEnv*, jclass cls) {
   return NoPatchoatTest::hasExecutableOat(cls);
 }
diff --git a/test/117-nopatchoat/run b/test/117-nopatchoat/run
index c749c74..c634900 100755
--- a/test/117-nopatchoat/run
+++ b/test/117-nopatchoat/run
@@ -36,8 +36,6 @@
 
 # Make sure we can run without relocation
 echo "Run without dex2oat/patchoat"
-# /bin/false is actually not even there for either, so the exec will fail.
-# Unfortunately there is no equivalent to /bin/false in android.
 ${RUN} ${flags} --runtime-option -Xnodex2oat
 
 # Make sure we can run with the oat file.
diff --git a/test/117-nopatchoat/src/Main.java b/test/117-nopatchoat/src/Main.java
index 223e120..425cf48 100644
--- a/test/117-nopatchoat/src/Main.java
+++ b/test/117-nopatchoat/src/Main.java
@@ -18,16 +18,20 @@
   public static void main(String[] args) {
     System.loadLibrary(args[0]);
 
+    // With a relocationDelta of 0, the runtime has no way to determine if the oat file in
+    // ANDROID_DATA has been relocated, since a non-relocated oat file always has a 0 delta.
+    // Hitting this condition should be rare and ideally we would prevent it from happening but
+    // there is no way to do so without major changes to the run-test framework.
     boolean executable_correct = (isPic() ?
-                                  hasExecutableOat() == true :
-                                  hasExecutableOat() == isDex2OatEnabled());
+        hasExecutableOat() == true :
+        hasExecutableOat() == (isDex2OatEnabled() || isRelocationDeltaZero()));
 
     System.out.println(
         "dex2oat & patchoat are " + ((isDex2OatEnabled()) ? "enabled" : "disabled") +
-        ", has oat is " + hasOat() + ", has executable oat is " + (
+        ", has oat is " + hasOatFile() + ", has executable oat is " + (
         executable_correct ? "expected" : "not expected") + ".");
 
-    if (!hasOat() && isDex2OatEnabled()) {
+    if (!hasOatFile() && isDex2OatEnabled()) {
       throw new Error("Application with dex2oat enabled runs without an oat file");
     }
 
@@ -47,7 +51,9 @@
 
   private native static boolean isPic();
 
-  private native static boolean hasOat();
+  private native static boolean hasOatFile();
 
   private native static boolean hasExecutableOat();
+
+  private native static boolean isRelocationDeltaZero();
 }
diff --git a/test/118-noimage-dex2oat/expected.txt b/test/118-noimage-dex2oat/expected.txt
index 0103e89..166481e 100644
--- a/test/118-noimage-dex2oat/expected.txt
+++ b/test/118-noimage-dex2oat/expected.txt
@@ -1,11 +1,14 @@
 Run -Xnoimage-dex2oat
+JNI_OnLoad called
 Has image is false, is image dex2oat enabled is false, is BOOTCLASSPATH on disk is false.
 testB18485243 PASS
 Run -Xnoimage-dex2oat -Xno-dex-file-fallback
 Failed to initialize runtime (check log for details)
 Run -Ximage-dex2oat
+JNI_OnLoad called
 Has image is true, is image dex2oat enabled is true, is BOOTCLASSPATH on disk is true.
 testB18485243 PASS
 Run default
+JNI_OnLoad called
 Has image is true, is image dex2oat enabled is true, is BOOTCLASSPATH on disk is true.
 testB18485243 PASS
diff --git a/test/118-noimage-dex2oat/noimage-dex2oat.cc b/test/118-noimage-dex2oat/noimage-dex2oat.cc
deleted file mode 100644
index aacf00f..0000000
--- a/test/118-noimage-dex2oat/noimage-dex2oat.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "class_linker.h"
-#include "dex_file-inl.h"
-#include "mirror/class-inl.h"
-#include "scoped_thread_state_change.h"
-#include "thread.h"
-
-namespace art {
-
-class NoDex2OatTest {
- public:
-  static bool hasOat(jclass cls) {
-    ScopedObjectAccess soa(Thread::Current());
-    mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
-    const DexFile& dex_file = klass->GetDexFile();
-    const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
-    return oat_dex_file != nullptr;
-  }
-};
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasImage(JNIEnv*, jclass) {
-  return Runtime::Current()->GetHeap()->HasImageSpace();
-}
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isImageDex2OatEnabled(JNIEnv*, jclass) {
-  return Runtime::Current()->IsImageDex2OatEnabled();
-}
-
-}  // namespace art
diff --git a/test/119-noimage-patchoat/expected.txt b/test/119-noimage-patchoat/expected.txt
index ed13662..9b9db58 100644
--- a/test/119-noimage-patchoat/expected.txt
+++ b/test/119-noimage-patchoat/expected.txt
@@ -1,8 +1,11 @@
 Run -Xnoimage-dex2oat -Xpatchoat:/system/bin/false
+JNI_OnLoad called
 Has image is false, is image dex2oat enabled is false.
 Run -Xnoimage-dex2oat -Xpatchoat:/system/bin/false -Xno-dex-file-fallback
 Failed to initialize runtime (check log for details)
 Run -Ximage-dex2oat
+JNI_OnLoad called
 Has image is true, is image dex2oat enabled is true.
 Run default
+JNI_OnLoad called
 Has image is true, is image dex2oat enabled is true.
diff --git a/test/137-cfi/expected.txt b/test/137-cfi/expected.txt
index e69de29..6a5618e 100644
--- a/test/137-cfi/expected.txt
+++ b/test/137-cfi/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/139-register-natives/expected.txt b/test/139-register-natives/expected.txt
index e69de29..6a5618e 100644
--- a/test/139-register-natives/expected.txt
+++ b/test/139-register-natives/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
new file mode 100644
index 0000000..53d7abe
--- /dev/null
+++ b/test/141-class-unload/expected.txt
@@ -0,0 +1,23 @@
+1
+2
+JNI_OnLoad called
+JNI_OnUnload called
+1
+2
+JNI_OnLoad called
+JNI_OnUnload called
+null
+null
+JNI_OnLoad called
+JNI_OnUnload called
+null
+loader null false
+loader null false
+JNI_OnLoad called
+JNI_OnUnload called
+null
+1
+2
+JNI_OnLoad called
+class null false test
+JNI_OnUnload called
diff --git a/test/141-class-unload/info.txt b/test/141-class-unload/info.txt
new file mode 100644
index 0000000..d8dd381
--- /dev/null
+++ b/test/141-class-unload/info.txt
@@ -0,0 +1 @@
+Test that classes get freed after they are no longer reachable.
diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc
new file mode 100644
index 0000000..d913efe
--- /dev/null
+++ b/test/141-class-unload/jni_unload.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+
+#include <iostream>
+
+#include "jit/jit.h"
+#include "jit/jit_instrumentation.h"
+#include "runtime.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace {
+
+extern "C" JNIEXPORT void JNICALL Java_IntHolder_waitForCompilation(JNIEnv*, jclass) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit->GetInstrumentationCache()->WaitForCompilationToFinish(Thread::Current());
+  }
+}
+
+}  // namespace
+}  // namespace art
diff --git a/test/141-class-unload/src-ex/IntHolder.java b/test/141-class-unload/src-ex/IntHolder.java
new file mode 100644
index 0000000..feff0d2
--- /dev/null
+++ b/test/141-class-unload/src-ex/IntHolder.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Simple class that holds a static int for testing that class unloading works
+// and re-runs the class initializer.
+public class IntHolder {
+    private static int value = 1;
+
+    public static void setValue(int newValue) {
+        value = newValue;
+    }
+
+    public static int getValue() {
+        return value;
+    }
+
+    public static void runGC() {
+        Runtime.getRuntime().gc();
+    }
+
+    public static void loadLibrary(String name) {
+        System.loadLibrary(name);
+    }
+
+    public static native void waitForCompilation();
+
+    public static Throwable generateStackTrace() {
+      return new Exception("test");
+    }
+}
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
new file mode 100644
index 0000000..3cc43ac
--- /dev/null
+++ b/test/141-class-unload/src/Main.java
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.ref.WeakReference;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
+public class Main {
+    static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/141-class-unload-ex.jar";
+    static String nativeLibraryName;
+
+    public static void main(String[] args) throws Exception {
+        nativeLibraryName = args[0];
+        Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+        if (pathClassLoader == null) {
+            throw new AssertionError("Couldn't find path class loader class");
+        }
+        Constructor constructor =
+            pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
+        try {
+            testUnloadClass(constructor);
+            testUnloadLoader(constructor);
+            // Test that we don't unload if we have a Method keeping the class live.
+            testNoUnloadInvoke(constructor);
+            // Test that we don't unload if we have an instance.
+            testNoUnloadInstance(constructor);
+            // Test JNI_OnLoad and JNI_OnUnload.
+            testLoadAndUnloadLibrary(constructor);
+            // Test that stack traces keep the classes live.
+            testStackTrace(constructor);
+            // Stress test to make sure we dont leak memory.
+            stressTest(constructor);
+        } catch (Exception e) {
+            System.out.println(e);
+        }
+    }
+
+    private static void stressTest(Constructor constructor) throws Exception {
+        for (int i = 0; i <= 100; ++i) {
+            setUpUnloadLoader(constructor, false);
+            if (i % 10 == 0) {
+                Runtime.getRuntime().gc();
+            }
+        }
+    }
+
+    private static void testUnloadClass(Constructor constructor) throws Exception {
+        WeakReference<Class> klass = setUpUnloadClass(constructor);
+        // No strong refernces to class loader, should get unloaded.
+        Runtime.getRuntime().gc();
+        WeakReference<Class> klass2 = setUpUnloadClass(constructor);
+        Runtime.getRuntime().gc();
+        // If the weak reference is cleared, then it was unloaded.
+        System.out.println(klass.get());
+        System.out.println(klass2.get());
+    }
+
+    private static void testUnloadLoader(Constructor constructor)
+        throws Exception {
+      WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
+      // No strong refernces to class loader, should get unloaded.
+      Runtime.getRuntime().gc();
+      // If the weak reference is cleared, then it was unloaded.
+      System.out.println(loader.get());
+    }
+
+    private static void testStackTrace(Constructor constructor) throws Exception {
+        WeakReference<Class> klass = setUpUnloadClass(constructor);
+        Method stackTraceMethod = klass.get().getDeclaredMethod("generateStackTrace");
+        Throwable throwable = (Throwable) stackTraceMethod.invoke(klass.get());
+        stackTraceMethod = null;
+        Runtime.getRuntime().gc();
+        boolean isNull = klass.get() == null;
+        System.out.println("class null " + isNull + " " + throwable.getMessage());
+    }
+
+    private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception {
+        WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
+        // No strong refernces to class loader, should get unloaded.
+        Runtime.getRuntime().gc();
+        // If the weak reference is cleared, then it was unloaded.
+        System.out.println(loader.get());
+    }
+
+    private static void testNoUnloadInvoke(Constructor constructor) throws Exception {
+        WeakReference<ClassLoader> loader =
+            new WeakReference((ClassLoader) constructor.newInstance(
+                DEX_FILE, ClassLoader.getSystemClassLoader()));
+        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
+        intHolder.get().getDeclaredMethod("runGC").invoke(intHolder.get());
+        boolean isNull = loader.get() == null;
+        System.out.println("loader null " + isNull);
+    }
+
+    private static void testNoUnloadInstance(Constructor constructor) throws Exception {
+        WeakReference<ClassLoader> loader =
+            new WeakReference((ClassLoader) constructor.newInstance(
+                DEX_FILE, ClassLoader.getSystemClassLoader()));
+        WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder"));
+        Object o = intHolder.get().newInstance();
+        Runtime.getRuntime().gc();
+        boolean isNull = loader.get() == null;
+        System.out.println("loader null " + isNull);
+    }
+
+    private static WeakReference<Class> setUpUnloadClass(Constructor constructor) throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, ClassLoader.getSystemClassLoader());
+        Class intHolder = loader.loadClass("IntHolder");
+        Method getValue = intHolder.getDeclaredMethod("getValue");
+        Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
+        // Make sure we don't accidentally preserve the value in the int holder, the class
+        // initializer should be re-run.
+        System.out.println((int) getValue.invoke(intHolder));
+        setValue.invoke(intHolder, 2);
+        System.out.println((int) getValue.invoke(intHolder));
+        waitForCompilation(intHolder);
+        return new WeakReference(intHolder);
+    }
+
+    private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor constructor,
+                                                                boolean waitForCompilation)
+        throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, ClassLoader.getSystemClassLoader());
+        Class intHolder = loader.loadClass("IntHolder");
+        Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE);
+        setValue.invoke(intHolder, 2);
+        if (waitForCompilation) {
+            waitForCompilation(intHolder);
+        }
+        return new WeakReference(loader);
+    }
+
+    private static void waitForCompilation(Class intHolder) throws Exception {
+      // Load the native library so that we can call waitForCompilation.
+      Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
+      loadLibrary.invoke(intHolder, nativeLibraryName);
+      // Wait for JIT compilation to finish since the async threads may prevent unloading.
+      Method waitForCompilation = intHolder.getDeclaredMethod("waitForCompilation");
+      waitForCompilation.invoke(intHolder);
+    }
+
+    private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor constructor)
+        throws Exception {
+        ClassLoader loader = (ClassLoader) constructor.newInstance(
+            DEX_FILE, ClassLoader.getSystemClassLoader());
+        Class intHolder = loader.loadClass("IntHolder");
+        Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
+        loadLibrary.invoke(intHolder, nativeLibraryName);
+        return new WeakReference(loader);
+    }
+}
diff --git a/test/142-classloader2/expected.txt b/test/142-classloader2/expected.txt
new file mode 100644
index 0000000..86f5e22
--- /dev/null
+++ b/test/142-classloader2/expected.txt
@@ -0,0 +1 @@
+Everything OK.
diff --git a/test/142-classloader2/info.txt b/test/142-classloader2/info.txt
new file mode 100644
index 0000000..eb821a8
--- /dev/null
+++ b/test/142-classloader2/info.txt
@@ -0,0 +1 @@
+Check sub-classing of PathClassLoader.
diff --git a/test/142-classloader2/smali/MyPathClassLoader.smali b/test/142-classloader2/smali/MyPathClassLoader.smali
new file mode 100644
index 0000000..553abd4
--- /dev/null
+++ b/test/142-classloader2/smali/MyPathClassLoader.smali
@@ -0,0 +1,13 @@
+# Simple subclass of PathClassLoader with methods overridden.
+# We need to use smali right now to subclass a libcore class, see b/24304298.
+
+.class public LMyPathClassLoader;
+
+.super Ldalvik/system/PathClassLoader;
+
+# Simple forwarding constructor.
+.method public constructor <init>(Ljava/lang/String;Ljava/lang/ClassLoader;)V
+    .registers 3
+    invoke-direct {p0, p1, p2}, Ldalvik/system/PathClassLoader;-><init>(Ljava/lang/String;Ljava/lang/ClassLoader;)V
+    return-void
+.end method
diff --git a/test/142-classloader2/src-ex/A.java b/test/142-classloader2/src-ex/A.java
new file mode 100644
index 0000000..d5fa1f9d
--- /dev/null
+++ b/test/142-classloader2/src-ex/A.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *  Identical class to the main src, except with a different value, so we can distinguish them.
+ */
+public class A {
+    public static String value = "Ex-A";
+}
diff --git a/test/142-classloader2/src/A.java b/test/142-classloader2/src/A.java
new file mode 100644
index 0000000..532df51
--- /dev/null
+++ b/test/142-classloader2/src/A.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *  Main class, with a simple value.
+ */
+public class A {
+    public static String value = "Src-A";
+}
diff --git a/test/142-classloader2/src/Main.java b/test/142-classloader2/src/Main.java
new file mode 100644
index 0000000..86c61eb
--- /dev/null
+++ b/test/142-classloader2/src/Main.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+
+/**
+ * PathClassLoader test.
+ */
+public class Main {
+
+    private static ClassLoader createClassLoader(String dexPath, ClassLoader parent) {
+        try {
+            Class<?> myClassLoaderClass = Class.forName("MyPathClassLoader");
+            Constructor constructor = myClassLoaderClass.getConstructor(String.class,
+                                                                        ClassLoader.class);
+            return (ClassLoader)constructor.newInstance(dexPath, parent);
+        } catch (Exception e) {
+            // Ups, not available?!?!
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Main entry point.
+     */
+    public static void main(String[] args) throws Exception {
+        // Check the class-path for the second file. We'll use that one as the source of the
+        // new classloader.
+        String cp = System.getProperty("java.class.path");
+        if (cp.split(System.getProperty("path.separator")).length != 1) {
+            throw new IllegalStateException("Didn't find exactly one classpath element in " + cp);
+        }
+        if (!cp.endsWith("classloader2.jar")) {
+            throw new IllegalStateException("Don't understand classpath " + cp);
+        }
+        cp = cp.replace("classloader2.jar", "classloader2-ex.jar");
+
+        ClassLoader myClassLoader = createClassLoader(
+                cp, ClassLoader.getSystemClassLoader().getParent());
+
+        // Now load our test class.
+        Class<?> srcClass = A.class;
+        Class<?> exClass = myClassLoader.loadClass("A");
+
+        // First check: classes should be different.
+        if (srcClass == exClass) {
+            throw new IllegalStateException("Loaded class instances are the same");
+        }
+
+        // Secondary checks: get the static field values and make sure they aren't the same.
+        String srcValue = (String)srcClass.getDeclaredField("value").get(null);
+        if (!"Src-A".equals(srcValue)) {
+            throw new IllegalStateException("Expected Src-A, found " + srcValue);
+        }
+        String exValue = (String)exClass.getDeclaredField("value").get(null);
+        if (!"Ex-A".equals(exValue)) {
+            throw new IllegalStateException("Expected Ex-A, found " + exValue);
+        }
+
+        System.out.println("Everything OK.");
+    }
+}
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index a746664..f06c250 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -249,6 +249,25 @@
     array[Integer.MAX_VALUE - 998] = 1;
   }
 
+  /// CHECK-START: void Main.constantIndexing6(int[]) BCE (before)
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+  /// CHECK: BoundsCheck
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.constantIndexing6(int[]) BCE (after)
+  /// CHECK: Deoptimize
+
+  static void constantIndexing6(int[] array) {
+    array[3] = 1;
+    array[4] = 1;
+  }
+
+  // A helper into which the actual throwing function should be inlined.
+  static void constantIndexingForward6(int[] array) {
+    constantIndexing6(array);
+  }
+
   /// CHECK-START: void Main.loopPattern1(int[]) BCE (before)
   /// CHECK: BoundsCheck
   /// CHECK: ArraySet
@@ -602,7 +621,12 @@
       // This will cause AIOOBE.
       constantIndexing2(new int[3]);
     } catch (ArrayIndexOutOfBoundsException e) {
-      return 99;
+      try {
+        // This will cause AIOOBE.
+        constantIndexingForward6(new int[3]);
+      } catch (ArrayIndexOutOfBoundsException e2) {
+        return 99;
+      }
     }
     return 0;
   }
diff --git a/test/454-get-vreg/expected.txt b/test/454-get-vreg/expected.txt
index e69de29..6a5618e 100644
--- a/test/454-get-vreg/expected.txt
+++ b/test/454-get-vreg/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/455-set-vreg/expected.txt b/test/455-set-vreg/expected.txt
index e69de29..6a5618e 100644
--- a/test/455-set-vreg/expected.txt
+++ b/test/455-set-vreg/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/457-regs/expected.txt b/test/457-regs/expected.txt
index e69de29..6a5618e 100644
--- a/test/457-regs/expected.txt
+++ b/test/457-regs/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index a14200e..c32d34a 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -84,6 +84,172 @@
     return arg & -1;
   }
 
+  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
+  /// CHECK-DAG:     <<Const15:i\d+>>  IntConstant 15
+  /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const28>>]
+  /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const15>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
+  /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const28>>]
+  /// CHECK-DAG:                       Return [<<UShr>>]
+
+  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-NOT:                       And
+
+  public static int UShr28And15(int arg) {
+    return (arg >>> 28) & 15;
+  }
+
+  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
+  /// CHECK-DAG:     <<Const15:j\d+>>  LongConstant 15
+  /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const60>>]
+  /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const15>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
+  /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const60>>]
+  /// CHECK-DAG:                       Return [<<UShr>>]
+
+  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-NOT:                       And
+
+  public static long UShr60And15(long arg) {
+    return (arg >>> 60) & 15;
+  }
+
+  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
+  /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
+  /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const28>>]
+  /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
+  /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
+  /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const28>>]
+  /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  public static int UShr28And7(int arg) {
+    return (arg >>> 28) & 7;
+  }
+
+  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
+  /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
+  /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const60>>]
+  /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
+  /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
+  /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const60>>]
+  /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  public static long UShr60And7(long arg) {
+    return (arg >>> 60) & 7;
+  }
+
+  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
+  /// CHECK-DAG:     <<Const255:i\d+>> IntConstant 255
+  /// CHECK-DAG:     <<Shr:i\d+>>      Shr [<<Arg>>,<<Const24>>]
+  /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const255>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
+  /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const24>>]
+  /// CHECK-DAG:                       Return [<<UShr>>]
+
+  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-NOT:                       Shr
+  /// CHECK-NOT:                       And
+
+  public static int Shr24And255(int arg) {
+    return (arg >> 24) & 255;
+  }
+
+  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
+  /// CHECK-DAG:     <<Const255:j\d+>> LongConstant 255
+  /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const56>>]
+  /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const255>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
+  /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const56>>]
+  /// CHECK-DAG:                       Return [<<UShr>>]
+
+  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-NOT:                       Shr
+  /// CHECK-NOT:                       And
+
+  public static long Shr56And255(long arg) {
+    return (arg >> 56) & 255;
+  }
+
+  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
+  /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
+  /// CHECK-DAG:     <<Shr:i\d+>>      Shr [<<Arg>>,<<Const24>>]
+  /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
+  /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
+  /// CHECK-DAG:     <<Shr:i\d+>>      Shr [<<Arg>>,<<Const24>>]
+  /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  public static int Shr24And127(int arg) {
+    return (arg >> 24) & 127;
+  }
+
+  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
+  /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
+  /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const56>>]
+  /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
+  /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
+  /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
+  /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const56>>]
+  /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
+  /// CHECK-DAG:                       Return [<<And>>]
+
+  public static long Shr56And127(long arg) {
+    return (arg >> 56) & 127;
+  }
+
   /// CHECK-START: long Main.Div1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
@@ -1109,5 +1275,13 @@
     assertFloatEquals(DivMP25(100.0f), -400.0f);
     assertDoubleEquals(DivMP25(150.0), -600.0);
     assertLongEquals(Shl1(100), 200);
+    assertIntEquals(UShr28And15(0xc1234567), 0xc);
+    assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL);
+    assertIntEquals(UShr28And7(0xc1234567), 0x4);
+    assertLongEquals(UShr60And7(0xc123456787654321L), 0x4L);
+    assertIntEquals(Shr24And255(0xc1234567), 0xc1);
+    assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L);
+    assertIntEquals(Shr24And127(0xc1234567), 0x41);
+    assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L);
   }
 }
diff --git a/test/461-get-reference-vreg/expected.txt b/test/461-get-reference-vreg/expected.txt
index e69de29..6a5618e 100644
--- a/test/461-get-reference-vreg/expected.txt
+++ b/test/461-get-reference-vreg/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/466-get-live-vreg/expected.txt b/test/466-get-live-vreg/expected.txt
index e69de29..6a5618e 100644
--- a/test/466-get-live-vreg/expected.txt
+++ b/test/466-get-live-vreg/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc
index e3e0091..7e9a583 100644
--- a/test/466-get-live-vreg/get_live_vreg_jni.cc
+++ b/test/466-get-live-vreg/get_live_vreg_jni.cc
@@ -42,7 +42,9 @@
     } else if (m_name.compare("testIntervalHole") == 0) {
       found_method_ = true;
       uint32_t value = 0;
-      if (GetCurrentQuickFrame() != nullptr && m->IsOptimized(sizeof(void*))) {
+      if (GetCurrentQuickFrame() != nullptr &&
+          m->IsOptimized(sizeof(void*)) &&
+          !Runtime::Current()->IsDebuggable()) {
         CHECK_EQ(GetVReg(m, 0, kIntVReg, &value), false);
       } else {
         CHECK(GetVReg(m, 0, kIntVReg, &value));
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
index 2cfb04d..6b4da9d 100644
--- a/test/482-checker-loop-back-edge-use/src/Main.java
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -18,16 +18,27 @@
 public class Main {
 
   /// CHECK-START: void Main.loop1(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:2  ranges:{[2,22)} uses:[17,22]
-  /// CHECK:         Goto            liveness:20
+  /// CHECK:         <<Arg:z\d+>>  ParameterValue  liveness:<<ArgLiv:\d+>>  ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>]
+  /// CHECK:                       If [<<Arg>>]    liveness:<<IfLiv:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv:\d+>>
+  /// CHECK:                       Exit
+  /// CHECK-EVAL:    <<IfLiv>> + 1 == <<ArgUse>>
+  /// CHECK-EVAL:    <<GotoLiv>> + 2 == <<ArgLoopUse>>
+
   public static void loop1(boolean incoming) {
     while (incoming) {}
   }
 
   /// CHECK-START: void Main.loop2(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,44)} uses:[35,40,44]
-  /// CHECK:         Goto            liveness:38
-  /// CHECK:         Goto            liveness:42
+  /// CHECK:         <<Arg:z\d+>>  ParameterValue  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>]
+  /// CHECK:                       If [<<Arg>>]    liveness:<<IfLiv:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
+  /// CHECK-EVAL:    <<IfLiv>> + 1 == <<ArgUse>>
+  /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
+  /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse1>>
+  /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse2>>
+
   public static void loop2(boolean incoming) {
     while (true) {
       System.out.println("foo");
@@ -36,11 +47,14 @@
   }
 
   /// CHECK-START: void Main.loop3(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,60)} uses:[56,60]
-  /// CHECK:         Goto            liveness:58
+  /// CHECK:         <<Arg:z\d+>>  ParameterValue  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>]
+  /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
+  /// CHECK:                       InvokeVirtual   [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
+  /// CHECK-EVAL:    <<InvokeLiv>> == <<ArgUse>>
+  /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
+  /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse>>
 
-  // CHECK-START: void Main.loop3(boolean) liveness (after)
-  // CHECK-NOT:     Goto liveness:50
   public static void loop3(boolean incoming) {
     // 'incoming' only needs a use at the outer loop's back edge.
     while (System.currentTimeMillis() != 42) {
@@ -49,11 +63,11 @@
     }
   }
 
-  // CHECK-START: void Main.loop4(boolean) liveness (after)
-  // CHECK:         ParameterValue  liveness:4  ranges:{[4,22)} uses:[22]
+  /// CHECK-START: void Main.loop4(boolean) liveness (after)
+  /// CHECK:         <<Arg:z\d+>> ParameterValue  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgUse:\d+>>)} uses:[<<ArgUse>>]
+  /// CHECK:                      InvokeVirtual   [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>>
+  /// CHECK-EVAL:    <<InvokeLiv>> == <<ArgUse>>
 
-  // CHECK-START: void Main.loop4(boolean) liveness (after)
-  // CHECK-NOT:     Goto            liveness:18
   public static void loop4(boolean incoming) {
     // 'incoming' has no loop use, so should not have back edge uses.
     System.out.println(incoming);
@@ -63,59 +77,98 @@
   }
 
   /// CHECK-START: void Main.loop5(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,54)} uses:[37,46,50,54]
-  /// CHECK:         Goto            liveness:48
-  /// CHECK:         Goto            liveness:52
+  /// CHECK:         <<Arg:z\d+>>  ParameterValue  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>]
+  /// CHECK:                       InvokeVirtual   [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
+  /// CHECK:                       Exit
+  /// CHECK-EVAL:    <<InvokeLiv>> == <<ArgUse>>
+  /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
+  /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse1>>
+  /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse2>>
+
   public static void loop5(boolean incoming) {
     // 'incoming' must have a use at both back edges.
-    while (Runtime.getRuntime() != null) {
-      while (incoming) {
+    for (long i = System.nanoTime(); i < 42; ++i) {
+      for (long j = System.currentTimeMillis(); j != 42; ++j) {
         System.out.println(incoming);
       }
     }
   }
 
   /// CHECK-START: void Main.loop6(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,50)} uses:[26,50]
-  /// CHECK:         Goto            liveness:48
+  /// CHECK:         <<Arg:z\d+>>  ParameterValue  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>]
+  /// CHECK:                       InvokeVirtual   [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>>
+  /// CHECK:                       Add
+  /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
+  /// CHECK:                       Add
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
+  /// CHECK:                       Exit
+  /// CHECK-EVAL:    <<InvokeLiv>> == <<ArgUse>>
+  /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
+  /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse>>
 
-  /// CHECK-START: void Main.loop6(boolean) liveness (after)
-  /// CHECK-NOT:     Goto            liveness:24
   public static void loop6(boolean incoming) {
     // 'incoming' must have a use only at the first loop's back edge.
-    while (true) {
+    for (long i = System.nanoTime(); i < 42; ++i) {
       System.out.println(incoming);
-      while (Runtime.getRuntime() != null) {}
+      for (long j = System.currentTimeMillis(); j != 42; ++j) {}
     }
   }
 
   /// CHECK-START: void Main.loop7(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,54)} uses:[36,45,50,54]
-  /// CHECK:         Goto            liveness:48
-  /// CHECK:         Goto            liveness:52
+  /// CHECK:         <<Arg:z\d+>>  ParameterValue  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse1:\d+>>,<<ArgUse2:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>]
+  /// CHECK:                       InvokeVirtual   [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>>
+  /// CHECK:                       If              [<<Arg>>] liveness:<<IfLiv:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
+  /// CHECK:                       Exit
+  /// CHECK-EVAL:    <<InvokeLiv>> == <<ArgUse1>>
+  /// CHECK-EVAL:    <<IfLiv>> + 1 == <<ArgUse2>>
+  /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
+  /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse1>>
+  /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse2>>
+
   public static void loop7(boolean incoming) {
     // 'incoming' must have a use at both back edges.
     while (Runtime.getRuntime() != null) {
       System.out.println(incoming);
       while (incoming) {}
+      System.nanoTime();  // beat back edge splitting
     }
   }
 
   /// CHECK-START: void Main.loop8() liveness (after)
-  /// CHECK:         StaticFieldGet  liveness:14 ranges:{[14,48)} uses:[39,44,48]
-  /// CHECK:         Goto            liveness:42
-  /// CHECK:         Goto            liveness:46
+  /// CHECK:         <<Arg:z\d+>>  StaticFieldGet  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>]
+  /// CHECK:                       If [<<Arg>>]    liveness:<<IfLiv:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
+  /// CHECK:                       Exit
+  /// CHECK-EVAL:    <<IfLiv>> + 1 == <<ArgUse>>
+  /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
+  /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse1>>
+  /// CHECK-EVAL:    <<GotoLiv2>> + 2 == <<ArgLoopUse2>>
+
   public static void loop8() {
     // 'incoming' must have a use at both back edges.
     boolean incoming = field;
     while (Runtime.getRuntime() != null) {
+      System.nanoTime();  // beat pre-header creation
       while (incoming) {}
+      System.nanoTime();  // beat back edge splitting
     }
   }
 
   /// CHECK-START: void Main.loop9() liveness (after)
-  /// CHECK:         StaticFieldGet  liveness:26 ranges:{[26,40)} uses:[35,40]
-  /// CHECK:         Goto            liveness:42
+  /// CHECK:         <<Arg:z\d+>>  StaticFieldGet  liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>]
+  /// CHECK:                       If [<<Arg>>]    liveness:<<IfLiv:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv1:\d+>>
+  /// CHECK:                       Goto            liveness:<<GotoLiv2:\d+>>
+  /// CHECK:                       Exit
+  /// CHECK-EVAL:    <<IfLiv>> + 1 == <<ArgUse>>
+  /// CHECK-EVAL:    <<GotoLiv1>> < <<GotoLiv2>>
+  /// CHECK-EVAL:    <<GotoLiv1>> + 2 == <<ArgLoopUse>>
+
   public static void loop9() {
     while (Runtime.getRuntime() != null) {
       // 'incoming' must only have a use in the inner loop.
diff --git a/test/485-checker-dce-switch/expected.txt b/test/485-checker-dce-switch/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/485-checker-dce-switch/expected.txt
diff --git a/test/485-checker-dce-switch/info.txt b/test/485-checker-dce-switch/info.txt
new file mode 100644
index 0000000..6653526
--- /dev/null
+++ b/test/485-checker-dce-switch/info.txt
@@ -0,0 +1 @@
+Tests that DCE can remove a packed switch.
diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java
new file mode 100644
index 0000000..019d876
--- /dev/null
+++ b/test/485-checker-dce-switch/src/Main.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static int $inline$method() {
+    return 5;
+  }
+
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (before)
+  /// CHECK-DAG:                      PackedSwitch
+
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-DAG:    <<Const100:i\d+>> IntConstant 100
+  /// CHECK-DAG:                      Return [<<Const100>>]
+
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after)
+  /// CHECK-NOT:                      PackedSwitch
+
+  public static int wholeSwitchDead(int j) {
+    int i = $inline$method();
+    int l = 100;
+    if (i > 100) {
+      switch(j) {
+        case 1:
+          i++;
+          break;
+        case 2:
+          i = 99;
+          break;
+        case 3:
+          i = 100;
+          break;
+        case 4:
+          i = -100;
+          break;
+        case 5:
+          i = 7;
+          break;
+        case 6:
+          i = -9;
+          break;
+      }
+      l += i;
+    }
+
+    return l;
+  }
+
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (before)
+  /// CHECK-DAG:                      PackedSwitch
+
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
+  /// CHECK-DAG:                      Return [<<Const7>>]
+
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after)
+  /// CHECK-NOT:                      PackedSwitch
+
+  public static int constantSwitch_InRange() {
+    int i = $inline$method();
+    switch(i) {
+      case 1:
+        i++;
+        break;
+      case 2:
+        i = 99;
+        break;
+      case 3:
+        i = 100;
+        break;
+      case 4:
+        i = -100;
+        break;
+      case 5:
+        i = 7;
+        break;
+      case 6:
+        i = -9;
+        break;
+    }
+
+    return i;
+  }
+
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (before)
+  /// CHECK-DAG:                      PackedSwitch
+
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-DAG:     <<Const15:i\d+>> IntConstant 15
+  /// CHECK-DAG:                      Return [<<Const15>>]
+
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after)
+  /// CHECK-NOT:                      PackedSwitch
+
+  public static int constantSwitch_AboveRange() {
+    int i = $inline$method() + 10;
+    switch(i) {
+      case 1:
+        i++;
+        break;
+      case 2:
+        i = 99;
+        break;
+      case 3:
+        i = 100;
+        break;
+      case 4:
+        i = -100;
+        break;
+      case 5:
+        i = 7;
+        break;
+      case 6:
+        i = -9;
+        break;
+    }
+
+    return i;
+  }
+
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (before)
+  /// CHECK-DAG:                      PackedSwitch
+
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-DAG:     <<ConstM5:i\d+>> IntConstant -5
+  /// CHECK-DAG:                      Return [<<ConstM5>>]
+
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after)
+  /// CHECK-NOT:                      PackedSwitch
+
+  public static int constantSwitch_BelowRange() {
+    int i = $inline$method() - 10;
+    switch(i) {
+      case 1:
+        i++;
+        break;
+      case 2:
+        i = 99;
+        break;
+      case 3:
+        i = 100;
+        break;
+      case 4:
+        i = -100;
+        break;
+      case 5:
+        i = 7;
+        break;
+      case 6:
+        i = -9;
+        break;
+    }
+
+    return i;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int ret_val = wholeSwitchDead(10);
+    if (ret_val != 100) {
+      throw new Error("Incorrect return value from wholeSwitchDead:" + ret_val);
+    }
+
+    ret_val = constantSwitch_InRange();
+    if (ret_val != 7) {
+      throw new Error("Incorrect return value from constantSwitch_InRange:" + ret_val);
+    }
+
+    ret_val = constantSwitch_AboveRange();
+    if (ret_val != 15) {
+      throw new Error("Incorrect return value from constantSwitch_AboveRange:" + ret_val);
+    }
+
+    ret_val = constantSwitch_BelowRange();
+    if (ret_val != -5) {
+      throw new Error("Incorrect return value from constantSwitch_BelowRange:" + ret_val);
+    }
+  }
+}
diff --git a/test/497-inlining-and-class-loader/expected.txt b/test/497-inlining-and-class-loader/expected.txt
index f5b9fe0..905dbfd 100644
--- a/test/497-inlining-and-class-loader/expected.txt
+++ b/test/497-inlining-and-class-loader/expected.txt
@@ -1,3 +1,4 @@
+JNI_OnLoad called
 java.lang.Exception
 	at Main.$noinline$bar(Main.java:124)
 	at Level2.$inline$bar(Level1.java:25)
diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali
index 2274ba4..1fde5ed 100644
--- a/test/510-checker-try-catch/smali/Builder.smali
+++ b/test/510-checker-try-catch/smali/Builder.smali
@@ -59,7 +59,7 @@
 ## CHECK:  StoreLocal       [v0,<<Minus2>>]
 
 ## CHECK:  name             "<<BCatch3>>"
-## CHECK:  predecessors     "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2>>" "<<BExitTry2>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
 ## CHECK:  StoreLocal       [v0,<<Minus3>>]
@@ -70,18 +70,18 @@
 ## CHECK:  xhandlers        "<<BCatch1>>" "<<BCatch3>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExitTry1>>"
-## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BAdd>>"
-## CHECK:  xhandlers        "<<BCatch1>>" "<<BCatch3>>"
-## CHECK:  TryBoundary      kind:exit
-
 ## CHECK:  name             "<<BEnterTry2>>"
 ## CHECK:  predecessors     "<<BAdd>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch2>>" "<<BCatch3>>"
 ## CHECK:  TryBoundary      kind:entry
 
+## CHECK:  name             "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BTry1>>"
+## CHECK:  successors       "<<BAdd>>"
+## CHECK:  xhandlers        "<<BCatch1>>" "<<BCatch3>>"
+## CHECK:  TryBoundary      kind:exit
+
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
@@ -121,8 +121,7 @@
     goto :return
 .end method
 
-# Test that multiple try-entry blocks are generated if there are multiple entry
-# points into the try block.
+# Tests try-entry block when there are multiple entry points into the try block.
 
 ## CHECK-START: int Builder.testMultipleEntries(int, int, int, int) builder (after)
 
@@ -142,20 +141,20 @@
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>"
-## CHECK:  successors       "<<BTry2:B\d+>>"
+## CHECK:  successors       "<<BExitTry1:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BTry2>>"
-## CHECK:  predecessors     "<<BEnterTry2>>" "<<BTry1>>"
-## CHECK:  successors       "<<BExitTry:B\d+>>"
+## CHECK:  name             "<<BTry2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2>>"
+## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry>>" "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BExitTry2>>" "<<BCatch:B\d+>>"
 ## CHECK:  Return
 
 ## CHECK:  name             "<<BCatch>>"
-## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
 ## CHECK:  StoreLocal       [v0,<<Minus1>>]
@@ -167,12 +166,18 @@
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BIf>>"
+## CHECK:  predecessors     "<<BIf>>" "<<BExitTry1>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExitTry>>"
+## CHECK:  name             "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  xhandlers        "<<BCatch>>"
+## CHECK:  TryBoundary      kind:exit
+
+## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
@@ -314,18 +319,18 @@
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExit1>>"
-## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BEnter2>>"
-## CHECK:  xhandlers        "<<BCatch1>>"
-## CHECK:  TryBoundary      kind:exit
-
 ## CHECK:  name             "<<BEnter2>>"
 ## CHECK:  predecessors     "<<BExit1>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:entry
 
+## CHECK:  name             "<<BExit1>>"
+## CHECK:  predecessors     "<<BTry1>>"
+## CHECK:  successors       "<<BEnter2>>"
+## CHECK:  xhandlers        "<<BCatch1>>"
+## CHECK:  TryBoundary      kind:exit
+
 ## CHECK:  name             "<<BExit2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
@@ -402,18 +407,18 @@
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExit1>>"
-## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BReturn>>"
-## CHECK:  xhandlers        "<<BCatch1>>"
-## CHECK:  TryBoundary      kind:exit
-
 ## CHECK:  name             "<<BEnter2>>"
 ## CHECK:  predecessors     "<<BGoto>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:entry
 
+## CHECK:  name             "<<BExit1>>"
+## CHECK:  predecessors     "<<BTry1>>"
+## CHECK:  successors       "<<BReturn>>"
+## CHECK:  xhandlers        "<<BCatch1>>"
+## CHECK:  TryBoundary      kind:exit
+
 ## CHECK:  name             "<<BExit2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
 ## CHECK:  successors       "<<BEnter1>>"
@@ -483,7 +488,7 @@
 ## CHECK:  StoreLocal       [v0,<<Minus1>>]
 
 ## CHECK:  name             "<<BCatchAll>>"
-## CHECK:  predecessors     "<<BEnter1>>" "<<BExit1>>" "<<BEnter2>>" "<<BExit2>>" "<<BEnter3>>" "<<BExit3>>"
+## CHECK:  predecessors     "<<BEnter1>>" "<<BEnter2>>" "<<BEnter3>>" "<<BExit1>>" "<<BExit2>>" "<<BExit3>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
 ## CHECK:  StoreLocal       [v0,<<Minus2>>]
@@ -494,30 +499,30 @@
 ## CHECK:  xhandlers        "<<BCatchAll>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExit1>>"
-## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BEnter2>>"
-## CHECK:  xhandlers        "<<BCatchAll>>"
-## CHECK:  TryBoundary      kind:exit
-
 ## CHECK:  name             "<<BEnter2>>"
 ## CHECK:  predecessors     "<<BExit1>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatchArith>>" "<<BCatchAll>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExit2>>"
-## CHECK:  predecessors     "<<BTry2>>"
-## CHECK:  successors       "<<BEnter3>>"
-## CHECK:  xhandlers        "<<BCatchArith>>" "<<BCatchAll>>"
-## CHECK:  TryBoundary      kind:exit
-
 ## CHECK:  name             "<<BEnter3>>"
 ## CHECK:  predecessors     "<<BExit2>>"
 ## CHECK:  successors       "<<BTry3>>"
 ## CHECK:  xhandlers        "<<BCatchAll>>"
 ## CHECK:  TryBoundary      kind:entry
 
+## CHECK:  name             "<<BExit1>>"
+## CHECK:  predecessors     "<<BTry1>>"
+## CHECK:  successors       "<<BEnter2>>"
+## CHECK:  xhandlers        "<<BCatchAll>>"
+## CHECK:  TryBoundary      kind:exit
+
+## CHECK:  name             "<<BExit2>>"
+## CHECK:  predecessors     "<<BTry2>>"
+## CHECK:  successors       "<<BEnter3>>"
+## CHECK:  xhandlers        "<<BCatchArith>>" "<<BCatchAll>>"
+## CHECK:  TryBoundary      kind:exit
+
 ## CHECK:  name             "<<BExit3>>"
 ## CHECK:  predecessors     "<<BTry3>>"
 ## CHECK:  successors       "<<BReturn>>"
@@ -577,7 +582,7 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BCatch>>"
-## CHECK:  predecessors     "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2>>" "<<BExitTry2>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
 ## CHECK:  StoreLocal       [v0,<<Minus1>>]
@@ -588,18 +593,18 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExitTry1>>"
-## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BOutside>>"
-## CHECK:  xhandlers        "<<BCatch>>"
-## CHECK:  TryBoundary      kind:exit
-
 ## CHECK:  name             "<<BEnterTry2>>"
 ## CHECK:  predecessors     "<<BOutside>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
+## CHECK:  name             "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BTry1>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  xhandlers        "<<BCatch>>"
+## CHECK:  TryBoundary      kind:exit
+
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
@@ -647,21 +652,21 @@
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>"
-## CHECK:  successors       "<<BTry2:B\d+>>"
+## CHECK:  successors       "<<BExitTry1:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BTry2>>"
-## CHECK:  predecessors     "<<BEnterTry2>>" "<<BTry1>>"
-## CHECK:  successors       "<<BExitTry:B\d+>>"
+## CHECK:  name             "<<BTry2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2>>"
+## CHECK:  successors       "<<BExitTry2:B\d+>>"
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BOutside>>"
-## CHECK:  predecessors     "<<BPSwitch1>>" "<<BExitTry>>"
+## CHECK:  predecessors     "<<BPSwitch1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BCatchReturn:B\d+>>"
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BCatchReturn>>"
-## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  flags            "catch_block"
 ## CHECK:  Return
 
@@ -677,7 +682,13 @@
 ## CHECK:  xhandlers        "<<BCatchReturn>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExitTry>>"
+## CHECK:  name             "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  TryBoundary      kind:exit
+
+## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
 ## CHECK:  successors       "<<BOutside>>"
 ## CHECK:  xhandlers        "<<BCatchReturn>>"
@@ -741,7 +752,7 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BCatchReturn>>"
-## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2>>" "<<BExitTry2>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  flags            "catch_block"
 ## CHECK:  Return
 
@@ -751,18 +762,18 @@
 ## CHECK:  xhandlers        "<<BCatchReturn>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExitTry1>>"
-## CHECK:  predecessors     "<<BPSwitch0>>"
-## CHECK:  successors       "<<BPSwitch1>>"
-## CHECK:  xhandlers        "<<BCatchReturn>>"
-## CHECK:  TryBoundary      kind:exit
-
 ## CHECK:  name             "<<BEnterTry2>>"
 ## CHECK:  predecessors     "<<BPSwitch1>>"
 ## CHECK:  successors       "<<BTry1>>"
 ## CHECK:  xhandlers        "<<BCatchReturn>>"
 ## CHECK:  TryBoundary      kind:entry
 
+## CHECK:  name             "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  successors       "<<BPSwitch1>>"
+## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  TryBoundary      kind:exit
+
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
 ## CHECK:  successors       "<<BOutside>>"
@@ -907,7 +918,7 @@
 ## CHECK:  Div
 
 ## CHECK:  name             "<<BCatch:B\d+>>"
-## CHECK:  predecessors     "<<BExitTry1>>" "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry2:B\d+>>"
+## CHECK:  predecessors     "<<BExitTry1>>" "<<BEnterTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry1>>" "<<BExitTry2:B\d+>>"
 ## CHECK:  successors       "<<BEnterTry2>>"
 ## CHECK:  flags            "catch_block"
 
@@ -928,18 +939,18 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExitTry1>>"
-## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BCatch>>"
-## CHECK:  xhandlers        "<<BCatch>>"
-## CHECK:  TryBoundary      kind:exit
-
 ## CHECK:  name             "<<BEnterTry2>>"
 ## CHECK:  predecessors     "<<BCatch>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
+## CHECK:  name             "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BTry1>>"
+## CHECK:  successors       "<<BCatch>>"
+## CHECK:  xhandlers        "<<BCatch>>"
+## CHECK:  TryBoundary      kind:exit
+
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
@@ -1001,18 +1012,18 @@
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:entry
 
-## CHECK:  name             "<<BExitTry1>>"
-## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BCatch2>>"
-## CHECK:  xhandlers        "<<BCatch2>>"
-## CHECK:  TryBoundary      kind:exit
-
 ## CHECK:  name             "<<BEnterTry2>>"
 ## CHECK:  predecessors     "<<BCatch2>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:entry
 
+## CHECK:  name             "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BTry1>>"
+## CHECK:  successors       "<<BCatch2>>"
+## CHECK:  xhandlers        "<<BCatch2>>"
+## CHECK:  TryBoundary      kind:exit
+
 ## CHECK:  name             "<<BExitTry2>>"
 ## CHECK:  predecessors     "<<BTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
@@ -1037,6 +1048,52 @@
     return p0
 .end method
 
+# Test graph with try/catch inside a loop.
+
+## CHECK-START: int Builder.testTryInLoop(int, int) builder (after)
+
+## CHECK:  name             "B0"
+## CHECK:  successors       "<<BEnterTry:B\d+>>"
+
+## CHECK:  name             "<<BTry:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry>>"
+## CHECK:  successors       "<<BExitTry:B\d+>>"
+## CHECK:  Div
+
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry>>" "<<BExitTry>>"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  flags            "catch_block"
+
+## CHECK:  name             "<<BExit:B\d+>>"
+## CHECK-NOT: predecessors  "{{B\d+}}"
+## CHECK:  end_block
+
+## CHECK:  name             "<<BEnterTry>>"
+## CHECK:  predecessors     "B0"
+## CHECK:  successors       "<<BTry>>"
+## CHECK:  xhandlers        "<<BCatch>>"
+## CHECK:  TryBoundary      kind:entry
+
+## CHECK:  name             "<<BExitTry>>"
+## CHECK:  predecessors     "<<BTry>>"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  xhandlers        "<<BCatch>>"
+## CHECK:  TryBoundary      kind:exit
+
+.method public static testTryInLoop(II)I
+    .registers 3
+
+    :try_start
+    div-int/2addr p0, p1
+    goto :try_start
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :catch_all
+    goto :try_start
+.end method
+
 # Test that a MOVE_RESULT instruction is placed into the same block as the
 # INVOKE it follows, even if there is a try boundary between them.
 
diff --git a/test/526-checker-caller-callee-regs/src/Main.java b/test/526-checker-caller-callee-regs/src/Main.java
index a1f3301..f402c2c 100644
--- a/test/526-checker-caller-callee-regs/src/Main.java
+++ b/test/526-checker-caller-callee-regs/src/Main.java
@@ -36,6 +36,8 @@
   // ------------------------------|------------------------|-----------------
   // ARM64 callee-saved registers  | [x20-x29]              | x2[0-9]
   // ARM callee-saved registers    | [r5-r8,r10,r11]        | r([5-8]|10|11)
+  // X86 callee-saved registers    | [ebp,esi,edi]          | e(bp|si|di)
+  // X86_64 callee-saved registers | [rbx,rbp,r12-15]       | r(bx|bp|1[2-5])
 
   /**
    * Check that a value live across a function call is allocated in a callee
@@ -58,7 +60,21 @@
   /// CHECK:                        Sub [<<t1>>,<<t2>>]
   /// CHECK:                        Return
 
-  // TODO: Add tests for other architectures.
+  /// CHECK-START-X86: int Main.$opt$LiveInCall(int) register (after)
+  /// CHECK-DAG:   <<Arg:i\d+>>     ParameterValue
+  /// CHECK-DAG:   <<Const1:i\d+>>  IntConstant 1
+  /// CHECK:       <<t1:i\d+>>      Add [<<Arg>>,<<Const1>>] {{.*->e(bp|si|di)}}
+  /// CHECK:       <<t2:i\d+>>      InvokeStaticOrDirect
+  /// CHECK:                        Sub [<<t1>>,<<t2>>]
+  /// CHECK:                        Return
+
+  /// CHECK-START-X86_64: int Main.$opt$LiveInCall(int) register (after)
+  /// CHECK-DAG:   <<Arg:i\d+>>     ParameterValue
+  /// CHECK-DAG:   <<Const1:i\d+>>  IntConstant 1
+  /// CHECK:       <<t1:i\d+>>      Add [<<Arg>>,<<Const1>>] {{.*->r(bx|bp|1[2-5])}}
+  /// CHECK:       <<t2:i\d+>>      InvokeStaticOrDirect
+  /// CHECK:                        Sub [<<t1>>,<<t2>>]
+  /// CHECK:                        Return
 
   public static int $opt$LiveInCall(int arg) {
     int t1 = arg + 1;
diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java
index 6f04797..adb5ada 100644
--- a/test/529-checker-unresolved/src/Main.java
+++ b/test/529-checker-unresolved/src/Main.java
@@ -44,6 +44,76 @@
     super.superMethod();
   }
 
+  /// CHECK-START: void Main.callUnresolvedStaticFieldAccess() register (before)
+  /// CHECK:        UnresolvedStaticFieldSet field_type:PrimByte
+  /// CHECK:        UnresolvedStaticFieldSet field_type:PrimChar
+  /// CHECK:        UnresolvedStaticFieldSet field_type:PrimInt
+  /// CHECK:        UnresolvedStaticFieldSet field_type:PrimLong
+  /// CHECK:        UnresolvedStaticFieldSet field_type:PrimFloat
+  /// CHECK:        UnresolvedStaticFieldSet field_type:PrimDouble
+  /// CHECK:        UnresolvedStaticFieldSet field_type:PrimNot
+
+  /// CHECK:        UnresolvedStaticFieldGet field_type:PrimByte
+  /// CHECK:        UnresolvedStaticFieldGet field_type:PrimChar
+  /// CHECK:        UnresolvedStaticFieldGet field_type:PrimInt
+  /// CHECK:        UnresolvedStaticFieldGet field_type:PrimLong
+  /// CHECK:        UnresolvedStaticFieldGet field_type:PrimFloat
+  /// CHECK:        UnresolvedStaticFieldGet field_type:PrimDouble
+  /// CHECK:        UnresolvedStaticFieldGet field_type:PrimNot
+  static public void callUnresolvedStaticFieldAccess() {
+    Object o = new Object();
+    UnresolvedClass.staticByte = (byte)1;
+    UnresolvedClass.staticChar = '1';
+    UnresolvedClass.staticInt = 123456789;
+    UnresolvedClass.staticLong = 123456789123456789l;
+    UnresolvedClass.staticFloat = 123456789123456789f;
+    UnresolvedClass.staticDouble = 123456789123456789d;
+    UnresolvedClass.staticObject = o;
+
+    expectEquals((byte)1, UnresolvedClass.staticByte);
+    expectEquals('1', UnresolvedClass.staticChar);
+    expectEquals(123456789, UnresolvedClass.staticInt);
+    expectEquals(123456789123456789l, UnresolvedClass.staticLong);
+    expectEquals(123456789123456789f, UnresolvedClass.staticFloat);
+    expectEquals(123456789123456789d, UnresolvedClass.staticDouble);
+    expectEquals(o, UnresolvedClass.staticObject);
+  }
+
+  /// CHECK-START: void Main.callUnresolvedInstanceFieldAccess(UnresolvedClass) register (before)
+  /// CHECK:        UnresolvedInstanceFieldSet field_type:PrimByte
+  /// CHECK:        UnresolvedInstanceFieldSet field_type:PrimChar
+  /// CHECK:        UnresolvedInstanceFieldSet field_type:PrimInt
+  /// CHECK:        UnresolvedInstanceFieldSet field_type:PrimLong
+  /// CHECK:        UnresolvedInstanceFieldSet field_type:PrimFloat
+  /// CHECK:        UnresolvedInstanceFieldSet field_type:PrimDouble
+  /// CHECK:        UnresolvedInstanceFieldSet field_type:PrimNot
+
+  /// CHECK:        UnresolvedInstanceFieldGet field_type:PrimByte
+  /// CHECK:        UnresolvedInstanceFieldGet field_type:PrimChar
+  /// CHECK:        UnresolvedInstanceFieldGet field_type:PrimInt
+  /// CHECK:        UnresolvedInstanceFieldGet field_type:PrimLong
+  /// CHECK:        UnresolvedInstanceFieldGet field_type:PrimFloat
+  /// CHECK:        UnresolvedInstanceFieldGet field_type:PrimDouble
+  /// CHECK:        UnresolvedInstanceFieldGet field_type:PrimNot
+  static public void callUnresolvedInstanceFieldAccess(UnresolvedClass c) {
+    Object o = new Object();
+    c.instanceByte = (byte)1;
+    c.instanceChar = '1';
+    c.instanceInt = 123456789;
+    c.instanceLong = 123456789123456789l;
+    c.instanceFloat = 123456789123456789f;
+    c.instanceDouble = 123456789123456789d;
+    c.instanceObject = o;
+
+    expectEquals((byte)1, c.instanceByte);
+    expectEquals('1', c.instanceChar);
+    expectEquals(123456789, c.instanceInt);
+    expectEquals(123456789123456789l, c.instanceLong);
+    expectEquals(123456789123456789f, c.instanceFloat);
+    expectEquals(123456789123456789d, c.instanceDouble);
+    expectEquals(o, c.instanceObject);
+  }
+
   /// CHECK-START: void Main.main(java.lang.String[]) register (before)
   /// CHECK:        InvokeUnresolved invoke_type:direct
   static public void main(String[] args) {
@@ -52,5 +122,49 @@
     callInvokeUnresolvedVirtual(c);
     callInvokeUnresolvedInterface(c);
     callInvokeUnresolvedSuper(new Main());
+    callUnresolvedStaticFieldAccess();
+    callUnresolvedInstanceFieldAccess(c);
+  }
+
+  public static void expectEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+    public static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(Object expected, Object result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
   }
 }
diff --git a/test/529-checker-unresolved/src/Unresolved.java b/test/529-checker-unresolved/src/Unresolved.java
index 5bf92dd..03ceb68 100644
--- a/test/529-checker-unresolved/src/Unresolved.java
+++ b/test/529-checker-unresolved/src/Unresolved.java
@@ -40,6 +40,22 @@
   public void interfaceMethod() {
     System.out.println("UnresolvedClass.interfaceMethod()");
   }
+
+  public static byte staticByte;
+  public static char staticChar;
+  public static int staticInt;
+  public static long staticLong;
+  public static float staticFloat;
+  public static double staticDouble;
+  public static Object staticObject;
+
+  public byte instanceByte;
+  public char instanceChar;
+  public int instanceInt;
+  public long instanceLong;
+  public float instanceFloat;
+  public double instanceDouble;
+  public Object instanceObject;
 }
 
 final class UnresolvedFinalClass {
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index e518a61..58c92f1 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -22,7 +22,7 @@
   static int sResult;
 
   //
-  // Various sequence variables where bound checks can be removed from loop.
+  // Various sequence variables used in bound checks.
   //
 
   /// CHECK-START: int Main.linear(int[]) BCE (before)
@@ -62,6 +62,19 @@
     return result;
   }
 
+  /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearVeryObscure(int[] x) {
+    int result = 0;
+    for (int i = 0; i < x.length; i++) {
+      int k = (-i) + (i << 5) + i - (32 * i) + 5 + (int) i;
+      result += x[k - 5];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.linearWhile(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
@@ -75,6 +88,42 @@
     return result;
   }
 
+  /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearThreeWayPhi(int[] x) {
+    int result = 0;
+    for (int i = 0; i < x.length; ) {
+      if (x[i] == 5) {
+        i++;
+        continue;
+      }
+      result += x[i++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearFourWayPhi(int[] x) {
+    int result = 0;
+    for (int i = 0; i < x.length; ) {
+      if (x[i] == 5) {
+        i++;
+        continue;
+      } else if (x[i] == 6) {
+        i++;
+        result += 7;
+        continue;
+      }
+      result += x[i++];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
@@ -90,6 +139,25 @@
     return result;
   }
 
+  /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int wrapAroundThenLinearThreeWayPhi(int[] x) {
+    // Loop with wrap around (length - 1, 0, 1, 2, ..).
+    int w = x.length - 1;
+    int result = 0;
+    for (int i = 0; i < x.length; ) {
+       if (x[w] == 1) {
+         w = i++;
+         continue;
+       }
+       result += x[w];
+       w = i++;
+    }
+    return result;
+  }
+
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
@@ -102,6 +170,19 @@
     return x;
   }
 
+  /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int[] linearCopy(int x[]) {
+    int n = x.length;
+    int y[] = new int[n];
+    for (int i = 0; i < n; i++) {
+      y[i] = x[i];
+    }
+    return y;
+  }
+
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
@@ -126,7 +207,7 @@
     int result = 0;
     int k = 0;
     // Range analysis has no problem with a trip-count defined by a
-    // reasonably large positive stride.
+    // reasonably large positive stride far away from upper bound.
     for (int i = 1; i <= 10 * 10000000 + 1; i += 10000000) {
       result += x[k++];
     }
@@ -143,7 +224,7 @@
     int k = 0;
     // Range analysis conservatively bails due to potential of wrap-around
     // arithmetic while computing the trip-count for this very large stride.
-    for (int i = 1; i < 2147483647; i += 195225786) {
+    for (int i = 1; i < Integer.MAX_VALUE; i += 195225786) {
       result += x[k++];
     }
     return result;
@@ -158,7 +239,7 @@
     int result = 0;
     int k = 0;
     // Range analysis has no problem with a trip-count defined by a
-    // reasonably large negative stride.
+    // reasonably large negative stride far away from lower bound.
     for (int i = -1; i >= -10 * 10000000 - 1; i -= 10000000) {
       result += x[k++];
     }
@@ -175,12 +256,80 @@
     int k = 0;
     // Range analysis conservatively bails due to potential of wrap-around
     // arithmetic while computing the trip-count for this very large stride.
-    for (int i = -2; i > -2147483648; i -= 195225786) {
+    for (int i = -2; i > Integer.MIN_VALUE; i -= 195225786) {
       result += x[k++];
     }
     return result;
   }
 
+  /// CHECK-START: int Main.linearForNEUp() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearForNEUp() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearForNEUp() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = 0; i != 10; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearForNEDown() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearForNEDown() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearForNEDown() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = 9; i != -1; i--) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDoWhileUp() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearDoWhileUp() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    int i = 0;
+    do {
+      result += x[i++];
+    } while (i < 10);
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDoWhileDown() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearDoWhileDown() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    int i = 9;
+    do {
+      result += x[i--];
+    } while (0 <= i);
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearShort() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearShort() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  private static int linearShort() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    // TODO: make this work
+    for (short i = 0; i < 10; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
@@ -242,23 +391,156 @@
     return result;
   }
 
-  //
-  // Cases that actually go out of bounds. These test cases
-  // ensure the exceptions are thrown at the right places.
-  //
+  /// CHECK-START: int Main.justRightUp1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.justRightUp1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int justRightUp1() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MAX_VALUE - 10, k = 0; i < Integer.MAX_VALUE; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
 
+  /// CHECK-START: int Main.justRightUp2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.justRightUp2() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int justRightUp2() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MAX_VALUE - 10; i < Integer.MAX_VALUE; i++) {
+      result += x[i - Integer.MAX_VALUE + 10];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightUp3() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.justRightUp3() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int justRightUp3() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MAX_VALUE - 10, k = 0; i <= Integer.MAX_VALUE - 1; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justOOBUp() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.justOOBUp() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  private static int justOOBUp() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    // Infinite loop!
+    for (int i = Integer.MAX_VALUE - 9, k = 0; i <= Integer.MAX_VALUE; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightDown1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.justRightDown1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int justRightDown1() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MIN_VALUE + 10, k = 0; i > Integer.MIN_VALUE; i--) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightDown2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.justRightDown2() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int justRightDown2() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MIN_VALUE + 10; i > Integer.MIN_VALUE; i--) {
+      result += x[Integer.MAX_VALUE + i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justRightDown3() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.justRightDown3() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int justRightDown3() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    for (int i = Integer.MIN_VALUE + 10, k = 0; i >= Integer.MIN_VALUE + 1; i--) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.justOOBDown() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.justOOBDown() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  private static int justOOBDown() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int result = 0;
+    // Infinite loop!
+    for (int i = Integer.MIN_VALUE + 9, k = 0; i >= Integer.MIN_VALUE; i--) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: void Main.lowerOOB(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
   private static void lowerOOB(int[] x) {
     for (int i = -1; i < x.length; i++) {
       sResult += x[i];
     }
   }
 
+  /// CHECK-START: void Main.upperOOB(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
   private static void upperOOB(int[] x) {
     for (int i = 0; i <= x.length; i++) {
       sResult += x[i];
     }
   }
 
+  /// CHECK-START: void Main.doWhileUpOOB() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  private static void doWhileUpOOB() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int i = 0;
+    do {
+      sResult += x[i++];
+    } while (i <= x.length);
+  }
+
+  /// CHECK-START: void Main.doWhileDownOOB() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  private static void doWhileDownOOB() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+    int i = x.length - 1;
+    do {
+      sResult += x[i--];
+    } while (-1 <= i);
+  }
+
   //
   // Verifier.
   //
@@ -274,10 +556,18 @@
     expectEquals(55, linearDown(x));
     expectEquals(0, linearObscure(empty));
     expectEquals(55, linearObscure(x));
+    expectEquals(0, linearVeryObscure(empty));
+    expectEquals(55, linearVeryObscure(x));
     expectEquals(0, linearWhile(empty));
     expectEquals(55, linearWhile(x));
+    expectEquals(0, linearThreeWayPhi(empty));
+    expectEquals(50, linearThreeWayPhi(x));
+    expectEquals(0, linearFourWayPhi(empty));
+    expectEquals(51, linearFourWayPhi(x));
     expectEquals(0, wrapAroundThenLinear(empty));
     expectEquals(55, wrapAroundThenLinear(x));
+    expectEquals(0, wrapAroundThenLinearThreeWayPhi(empty));
+    expectEquals(54, wrapAroundThenLinearThreeWayPhi(x));
 
     // Linear with parameter.
     sResult = 0;
@@ -295,6 +585,16 @@
       }
     }
 
+    // Linear copy.
+    expectEquals(0, linearCopy(empty).length);
+    {
+      int[] r = linearCopy(x);
+      expectEquals(x.length, r.length);
+      for (int i = 0; i < x.length; i++) {
+        expectEquals(x[i], r[i]);
+      }
+    }
+
     // Linear with non-unit strides.
     expectEquals(56, linearWithCompoundStride());
     expectEquals(66, linearWithLargePositiveStride());
@@ -302,6 +602,13 @@
     expectEquals(66, linearWithLargeNegativeStride());
     expectEquals(66, linearWithVeryLargeNegativeStride());
 
+    // Special forms.
+    expectEquals(55, linearForNEUp());
+    expectEquals(55, linearForNEDown());
+    expectEquals(55, linearDoWhileUp());
+    expectEquals(55, linearDoWhileDown());
+    expectEquals(55, linearShort());
+
     // Periodic adds (1, 3), one at the time.
     expectEquals(0, periodicIdiom(-1));
     for (int tc = 0; tc < 32; tc++) {
@@ -326,6 +633,28 @@
       expectEquals(tc * 16, periodicSequence4(tc));
     }
 
+    // Large bounds.
+    expectEquals(55, justRightUp1());
+    expectEquals(55, justRightUp2());
+    expectEquals(55, justRightUp3());
+    expectEquals(55, justRightDown1());
+    expectEquals(55, justRightDown2());
+    expectEquals(55, justRightDown3());
+    sResult = 0;
+    try {
+      justOOBUp();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    sResult = 0;
+    try {
+      justOOBDown();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+
     // Lower bound goes OOB.
     sResult = 0;
     try {
@@ -344,6 +673,23 @@
     }
     expectEquals(1055, sResult);
 
+    // Do while up goes OOB.
+    sResult = 0;
+    try {
+      doWhileUpOOB();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Do while down goes OOB.
+    sResult = 0;
+    try {
+      doWhileDownOOB();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
   }
 
   private static void expectEquals(int expected, int result) {
diff --git a/test/532-checker-nonnull-arrayset/expected.txt b/test/532-checker-nonnull-arrayset/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/532-checker-nonnull-arrayset/expected.txt
diff --git a/test/532-checker-nonnull-arrayset/info.txt b/test/532-checker-nonnull-arrayset/info.txt
new file mode 100644
index 0000000..e1578c8
--- /dev/null
+++ b/test/532-checker-nonnull-arrayset/info.txt
@@ -0,0 +1 @@
+Test that we optimize ArraySet when the value is not null.
diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java
new file mode 100644
index 0000000..7d8fff4
--- /dev/null
+++ b/test/532-checker-nonnull-arrayset/src/Main.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Check that we don't put a null check in the card marking code.
+
+  /// CHECK-START: void Main.test() instruction_simplifier (before)
+  /// CHECK:          ArraySet value_can_be_null:true
+
+  /// CHECK-START: void Main.test() instruction_simplifier (after)
+  /// CHECK:          ArraySet value_can_be_null:false
+
+  /// CHECK-START-X86: void Main.test() disassembly (after)
+  /// CHECK:          ArraySet value_can_be_null:false
+  /// CHECK-NOT:      test
+  /// CHECK:          ReturnVoid
+  public static void test() {
+    Object[] array = new Object[1];
+    Object nonNull = array[0];
+    nonNull.getClass(); // Ensure nonNull has an implicit null check.
+    array[0] = nonNull;
+  }
+
+  public static void main(String[] args) {}
+}
diff --git a/test/533-regression-debugphi/expected.txt b/test/533-regression-debugphi/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/533-regression-debugphi/expected.txt
diff --git a/test/533-regression-debugphi/info.txt b/test/533-regression-debugphi/info.txt
new file mode 100644
index 0000000..a4d4857
--- /dev/null
+++ b/test/533-regression-debugphi/info.txt
@@ -0,0 +1,2 @@
+Test a regression where DeadPhiHandling would infinitely loop over
+complicated phi dependencies.
diff --git a/test/533-regression-debugphi/smali/TestCase.smali b/test/533-regression-debugphi/smali/TestCase.smali
new file mode 100644
index 0000000..1908e72
--- /dev/null
+++ b/test/533-regression-debugphi/smali/TestCase.smali
@@ -0,0 +1,72 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# This is a reduced test case that used to trigger an infinite loop
+# in the DeadPhiHandling phase of the optimizing compiler (only used
+# with debuggable flag).
+.method public static testCase(IILjava/lang/Object;)V
+  .registers 5
+  const/4 v0, 0x0
+
+  :B4
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  goto :B7
+
+  :B7
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  if-nez p2, :Btmp
+  goto :B111
+
+  :Btmp
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  if-nez p2, :B9
+  goto :B110
+
+  :B13
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  add-int v0, p0, p1
+  goto :B7
+
+  :B110
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  add-int v0, p0, p1
+  goto :B111
+
+  :B111
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  goto :B4
+
+  :B9
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  if-nez p2, :B10
+
+  :B11
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  move v1, v0
+  goto :B12
+
+  :B10
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  move-object v1, p2
+  goto :B12
+
+  :B12
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  goto :B13
+
+  return-void
+.end method
diff --git a/test/533-regression-debugphi/src/Main.java b/test/533-regression-debugphi/src/Main.java
new file mode 100644
index 0000000..858770f
--- /dev/null
+++ b/test/533-regression-debugphi/src/Main.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+}
diff --git a/test/534-checker-bce-deoptimization/expected.txt b/test/534-checker-bce-deoptimization/expected.txt
new file mode 100644
index 0000000..3823a29
--- /dev/null
+++ b/test/534-checker-bce-deoptimization/expected.txt
@@ -0,0 +1 @@
+finish
diff --git a/test/534-checker-bce-deoptimization/info.txt b/test/534-checker-bce-deoptimization/info.txt
new file mode 100644
index 0000000..9f097d0
--- /dev/null
+++ b/test/534-checker-bce-deoptimization/info.txt
@@ -0,0 +1,8 @@
+Checker test for testing the behavior of deoptimization generated by
+bounds check elimination.
+
+The runtime used to trip on that test because it used to deopt the
+whole stack, and the compiler was not preserving dex registers at
+call sites.
+
+We fixed the bug by doing single frame deoptimization.
diff --git a/test/534-checker-bce-deoptimization/src/Main.java b/test/534-checker-bce-deoptimization/src/Main.java
new file mode 100644
index 0000000..8cd20f6
--- /dev/null
+++ b/test/534-checker-bce-deoptimization/src/Main.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static void main(String[] args) {
+        new Main().run();
+        System.out.println("finish");
+    }
+
+    public void run() {
+        double a[][] = new double[200][201];
+        double b[] = new double[200];
+        int n = 100;
+
+        foo1(a, n, b);
+    }
+
+    void foo1(double a[][], int n, double b[]) {
+        double t;
+        int i,k;
+
+        for (i = 0; i < n; i++) {
+            k = n - (i + 1);
+            b[k] /= a[k][k];
+            t = -b[k];
+            foo2(k + 1000, t, b);
+        }
+    }
+
+    void foo2(int n, double c, double b[]) {
+        try {
+            foo3(n, c, b);
+        } catch (Exception e) {
+        }
+    }
+
+    void foo3(int n, double c, double b[]) {
+        int i = 0;
+        for (i = 0; i < n; i++) {
+            b[i + 1] += c * b[i + 1];
+        }
+    }
+}
+
diff --git a/test/535-deopt-and-inlining/expected.txt b/test/535-deopt-and-inlining/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/535-deopt-and-inlining/expected.txt
diff --git a/test/535-deopt-and-inlining/info.txt b/test/535-deopt-and-inlining/info.txt
new file mode 100644
index 0000000..717612a
--- /dev/null
+++ b/test/535-deopt-and-inlining/info.txt
@@ -0,0 +1,2 @@
+Stress test for deoptimization and JIT, to ensure the
+stack visitor uses the right ArtMethod when deopting.
diff --git a/test/535-deopt-and-inlining/src/Main.java b/test/535-deopt-and-inlining/src/Main.java
new file mode 100644
index 0000000..c231bf0
--- /dev/null
+++ b/test/535-deopt-and-inlining/src/Main.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void run() {
+    // Loop enough to get JIT compilation.
+    for (int i = 0; i < 10000; ++i) {
+      doCall(new int[0]);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    run();
+  }
+
+  public static void doCall(int[] array) {
+    try {
+      deopt(array);
+    } catch (IndexOutOfBoundsException ioobe) {
+      // Expected
+    }
+  }
+
+  public static void deopt(int[] array) {
+    // Invoke `deopt` much more than `$inline$deopt` so that only `deopt` gets
+    // initially JITted.
+    if (call == 100) {
+      call = 0;
+      $inline$deopt(array);
+    } else {
+      call++;
+    }
+  }
+
+  public static void $inline$deopt(int[] array) {
+    array[0] = 1;
+    array[1] = 1;
+  }
+
+  static int call = 0;
+}
diff --git a/test/535-regression-const-val/expected.txt b/test/535-regression-const-val/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/535-regression-const-val/expected.txt
diff --git a/test/535-regression-const-val/info.txt b/test/535-regression-const-val/info.txt
new file mode 100644
index 0000000..ea3e67b
--- /dev/null
+++ b/test/535-regression-const-val/info.txt
@@ -0,0 +1,2 @@
+Test a regression where SsaChecker would fail comparing raw value of IntConstant
+vs FloatConstant due to a static_cast sign extend.
diff --git a/test/535-regression-const-val/smali/TestCase.smali b/test/535-regression-const-val/smali/TestCase.smali
new file mode 100644
index 0000000..f42f173
--- /dev/null
+++ b/test/535-regression-const-val/smali/TestCase.smali
@@ -0,0 +1,36 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static testCase(ZZ)I
+  .registers 5
+
+  # Create Phi [ 0.0f, -0.25f ].
+  # Binary representation of -0.25f has the most significant bit set.
+  if-eqz p0, :else
+  :then
+    const v0, 0x0
+    goto :merge
+  :else
+    const/high16 v0, 0xbe800000
+  :merge
+
+  # Now use as either float or int.
+  if-eqz p1, :return
+  float-to-int v0, v0
+  :return
+  return v0
+.end method
diff --git a/test/535-regression-const-val/src/Main.java b/test/535-regression-const-val/src/Main.java
new file mode 100644
index 0000000..858770f
--- /dev/null
+++ b/test/535-regression-const-val/src/Main.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 6568eac..17c1f00 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -1,4 +1,6 @@
 PackedSwitch
+PackedSwitch key INT_MAX
+PackedSwitch key overflow
 b/17790197
 FloatBadArgReg
 negLong
diff --git a/test/800-smali/smali/PackedSwitch.smali b/test/800-smali/smali/PackedSwitch.smali
index 6a3e5f0..95659fb 100644
--- a/test/800-smali/smali/PackedSwitch.smali
+++ b/test/800-smali/smali/PackedSwitch.smali
@@ -24,3 +24,29 @@
     goto :return
 
 .end method
+
+.method public static packedSwitch_INT_MAX(I)I
+    .registers 2
+
+    const/4 v0, 0
+    packed-switch v0, :switch_data
+    goto :default
+
+    :switch_data
+    .packed-switch 0x7FFFFFFE
+        :case1  # key = INT_MAX - 1
+        :case2  # key = INT_MAX
+    .end packed-switch
+
+    :return
+    return v1
+
+    :default
+    goto :return
+
+    :case1
+    goto :return
+    :case2
+    goto :return
+
+.end method
diff --git a/test/800-smali/smali/b_24399945.smali b/test/800-smali/smali/b_24399945.smali
new file mode 100644
index 0000000..68f59d0
--- /dev/null
+++ b/test/800-smali/smali/b_24399945.smali
@@ -0,0 +1,32 @@
+.class public Lb_24399945;
+
+.super Ljava/lang/Object;
+
+.method public static packedSwitch_overflow(I)I
+    .registers 2
+
+    const/4 v0, 0
+    packed-switch v0, :switch_data
+    goto :default
+
+    :switch_data
+    .packed-switch 0x7FFFFFFE
+        :case1 # key = INT_MAX - 1
+        :case2 # key = INT_MAX
+        :case3 # key = INT_MIN (overflow!)
+    .end packed-switch
+
+    :return
+    return v1
+
+    :default
+    goto :return
+
+    :case1
+    goto :return
+    :case2
+    goto :return
+    :case3
+    goto :return
+
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index ba4990a..f75747d 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -51,6 +51,10 @@
         testCases = new LinkedList<TestCase>();
         testCases.add(new TestCase("PackedSwitch", "PackedSwitch", "packedSwitch",
                 new Object[]{123}, null, 123));
+        testCases.add(new TestCase("PackedSwitch key INT_MAX", "PackedSwitch",
+                "packedSwitch_INT_MAX", new Object[]{123}, null, 123));
+        testCases.add(new TestCase("PackedSwitch key overflow", "b_24399945",
+                "packedSwitch_overflow", new Object[]{123}, new VerifyError(), null));
 
         testCases.add(new TestCase("b/17790197", "B17790197", "getInt", null, null, 100));
         testCases.add(new TestCase("FloatBadArgReg", "FloatBadArgReg", "getInt",
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
index 3637099..16381e4 100644
--- a/test/955-lambda-smali/expected.txt
+++ b/test/955-lambda-smali/expected.txt
@@ -16,3 +16,13 @@
 (MoveResult) testF success
 (MoveResult) testD success
 (MoveResult) testL success
+(CaptureVariables) (0-args, 1 captured variable 'Z'): value is true
+(CaptureVariables) (0-args, 1 captured variable 'B'): value is R
+(CaptureVariables) (0-args, 1 captured variable 'C'): value is ∂
+(CaptureVariables) (0-args, 1 captured variable 'S'): value is 1000
+(CaptureVariables) (0-args, 1 captured variable 'I'): value is 12345678
+(CaptureVariables) (0-args, 1 captured variable 'J'): value is 3287471278325742
+(CaptureVariables) (0-args, 1 captured variable 'F'): value is Infinity
+(CaptureVariables) (0-args, 1 captured variable 'D'): value is -Infinity
+(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is true,R,∂,1000,12345678,3287471278325742,Infinity,-Infinity
+(CaptureVariables) Caught NPE
diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali
index 108b5fa..915de2d 100644
--- a/test/955-lambda-smali/smali/BoxUnbox.smali
+++ b/test/955-lambda-smali/smali/BoxUnbox.smali
@@ -1,4 +1,3 @@
-#
 #  Copyright (C) 2015 The Android Open Source Project
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
@@ -36,8 +35,8 @@
 .end method
 
 #TODO: should use a closure type instead of ArtMethod.
-.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
-    .registers 3 # 1 parameters, 2 locals
+.method public static doHelloWorld(J)V
+    .registers 4 # 1 wide parameters, 2 locals
 
     const-string v0, "(BoxUnbox) Hello boxing world! (0-args, no closure)"
 
@@ -51,9 +50,9 @@
 .method private static testBox()V
     .registers 3
 
-    create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
     box-lambda v2, v0 # v2 = box(v0)
-    unbox-lambda v0, v2, Ljava/lang/reflect/ArtMethod; # v0 = unbox(v2)
+    unbox-lambda v0, v2, J # v0 = unbox(v2)
     invoke-lambda v0, {}
 
     return-void
@@ -63,7 +62,7 @@
 .method private static testBoxEquality()V
    .registers 6 # 0 parameters, 6 locals
 
-    create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    create-lambda v0, LBoxUnbox;->doHelloWorld(J)V
     box-lambda v2, v0 # v2 = box(v0)
     box-lambda v3, v0 # v3 = box(v0)
 
@@ -95,7 +94,7 @@
     const v0, 0  # v0 = null
     const v1, 0  # v1 = null
 :start
-    unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod;
+    unbox-lambda v2, v0, J
     # attempting to unbox a null lambda will throw NPE
 :end
     return-void
@@ -140,7 +139,7 @@
     const-string v0, "This is not a boxed lambda"
 :start
     # TODO: use \FunctionalType; here instead
-    unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod;
+    unbox-lambda v2, v0, J
     # can't use a string, expects a lambda object here. throws ClassCastException.
 :end
     return-void
diff --git a/test/955-lambda-smali/smali/CaptureVariables.smali b/test/955-lambda-smali/smali/CaptureVariables.smali
new file mode 100644
index 0000000..f18b7ff
--- /dev/null
+++ b/test/955-lambda-smali/smali/CaptureVariables.smali
@@ -0,0 +1,311 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LCaptureVariables;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+.registers 8
+    # Test boolean capture
+    const v2, 1           # v2 = true
+    capture-variable v2, "Z"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_Z(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test byte capture
+    const v2, 82       # v2 = 82, 'R'
+    capture-variable v2, "B"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_B(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test char capture
+    const v2, 0x2202       # v2 = 0x2202, '∂'
+    capture-variable v2, "C"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_C(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test short capture
+    const v2, 1000 # v2 = 1000
+    capture-variable v2, "S"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_S(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test int capture
+    const v2, 12345678
+    capture-variable v2, "I"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_I(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test long capture
+    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
+    capture-variable v2, "J"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_J(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test float capture
+    const v2, infinityf
+    capture-variable v2, "F"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_F(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Test double capture
+    const-wide v2, -infinity
+    capture-variable v2, "D"
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_D(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    #TODO: capture objects and lambdas once we have support for it
+
+    # Test capturing multiple variables
+    invoke-static {}, LCaptureVariables;->testMultipleCaptures()V
+
+    # Test failures
+    invoke-static {}, LCaptureVariables;->testFailures()V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_Z(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'Z'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "Z"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(Z)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_B(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'B'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "B"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V  # no println(B), use char instead.
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_C(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'C'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "C"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_S(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'S'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "S"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V  # no println(S), use int instead
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_I(J)V
+    .registers 5 # 1 wide parameter, 3 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'I'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "I"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_J(J)V
+    .registers 6 # 1 wide parameter, 4 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'J'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "J"
+    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(J)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_F(J)V
+    .registers 5 # 1 parameter, 4 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'F'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "F"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(F)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_D(J)V
+    .registers 6 # 1 wide parameter, 4 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'D'): value is "
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "D"
+    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
+
+    return-void
+.end method
+
+# Test capturing more than one variable.
+.method private static testMultipleCaptures()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v2, 1           # v2 = true
+    capture-variable v2, "Z"
+
+    const v2, 82       # v2 = 82, 'R'
+    capture-variable v2, "B"
+
+    const v2, 0x2202       # v2 = 0x2202, '∂'
+    capture-variable v2, "C"
+
+    const v2, 1000 # v2 = 1000
+    capture-variable v2, "S"
+
+    const v2, 12345678
+    capture-variable v2, "I"
+
+    const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742
+    capture-variable v2, "J"
+
+    const v2, infinityf
+    capture-variable v2, "F"
+
+    const-wide v2, -infinity
+    capture-variable v2, "D"
+
+    create-lambda v0, LCaptureVariables;->printCapturedVariable_ZBCSIJFD(J)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+.end method
+
+#TODO: should use a closure type instead of a long
+.method public static printCapturedVariable_ZBCSIJFD(J)V
+    .registers 7 # 1 wide parameter, 5 locals
+
+    const-string v0, "(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is "
+    const-string v4, ","
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "Z"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(Z)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "B"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "C"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "S"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "I"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "J"
+    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->print(J)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "F"
+    invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(F)V
+    invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    liberate-variable v2, p0, "D"
+    invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V
+
+    return-void
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v0, 0  # v0 = null
+    const v1, 0  # v1 = null
+:start
+    liberate-variable v0, v2, "Z" # invoking a null lambda shall raise an NPE
+:end
+    return-void
+
+:handler
+    const-string v2, "(CaptureVariables) Caught NPE"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
+.end method
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
index 5d2aabb..9892d61 100644
--- a/test/955-lambda-smali/smali/Main.smali
+++ b/test/955-lambda-smali/smali/Main.smali
@@ -24,6 +24,7 @@
     invoke-static {}, LTrivialHelloWorld;->run()V
     invoke-static {}, LBoxUnbox;->run()V
     invoke-static {}, LMoveResult;->run()V
+    invoke-static {}, LCaptureVariables;->run()V
 
 # TODO: add tests when verification fails
 
diff --git a/test/955-lambda-smali/smali/MoveResult.smali b/test/955-lambda-smali/smali/MoveResult.smali
index 1725da3..52f7ba3 100644
--- a/test/955-lambda-smali/smali/MoveResult.smali
+++ b/test/955-lambda-smali/smali/MoveResult.smali
@@ -41,7 +41,7 @@
 .method public static testZ()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaZ(Ljava/lang/reflect/ArtMethod;)Z
+    create-lambda v0, LMoveResult;->lambdaZ(J)Z
     invoke-lambda v0, {}
     move-result v2
     const v3, 1
@@ -61,7 +61,7 @@
 .end method
 
 # Lambda target for testZ. Always returns "true".
-.method public static lambdaZ(Ljava/lang/reflect/ArtMethod;)Z
+.method public static lambdaZ(J)Z
     .registers 3
 
     const v0, 1
@@ -73,7 +73,7 @@
 .method public static testB()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaB(Ljava/lang/reflect/ArtMethod;)B
+    create-lambda v0, LMoveResult;->lambdaB(J)B
     invoke-lambda v0, {}
     move-result v2
     const v3, 15
@@ -93,7 +93,7 @@
 .end method
 
 # Lambda target for testB. Always returns "15".
-.method public static lambdaB(Ljava/lang/reflect/ArtMethod;)B
+.method public static lambdaB(J)B
     .registers 3 # 1 parameters, 2 locals
 
     const v0, 15
@@ -105,7 +105,7 @@
 .method public static testS()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaS(Ljava/lang/reflect/ArtMethod;)S
+    create-lambda v0, LMoveResult;->lambdaS(J)S
     invoke-lambda v0, {}
     move-result v2
     const/16 v3, 31000
@@ -125,7 +125,7 @@
 .end method
 
 # Lambda target for testS. Always returns "31000".
-.method public static lambdaS(Ljava/lang/reflect/ArtMethod;)S
+.method public static lambdaS(J)S
     .registers 3
 
     const/16 v0, 31000
@@ -137,7 +137,7 @@
 .method public static testI()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaI(Ljava/lang/reflect/ArtMethod;)I
+    create-lambda v0, LMoveResult;->lambdaI(J)I
     invoke-lambda v0, {}
     move-result v2
     const v3, 128000
@@ -157,7 +157,7 @@
 .end method
 
 # Lambda target for testI. Always returns "128000".
-.method public static lambdaI(Ljava/lang/reflect/ArtMethod;)I
+.method public static lambdaI(J)I
     .registers 3
 
     const v0, 128000
@@ -167,9 +167,9 @@
 
 # Test that chars are returned correctly via move-result.
 .method public static testC()V
-    .registers 6
+    .registers 7
 
-    create-lambda v0, LMoveResult;->lambdaC(Ljava/lang/reflect/ArtMethod;)C
+    create-lambda v0, LMoveResult;->lambdaC(J)C
     invoke-lambda v0, {}
     move-result v2
     const v3, 65535
@@ -189,7 +189,7 @@
 .end method
 
 # Lambda target for testC. Always returns "65535".
-.method public static lambdaC(Ljava/lang/reflect/ArtMethod;)C
+.method public static lambdaC(J)C
     .registers 3
 
     const v0, 65535
@@ -199,12 +199,12 @@
 
 # Test that longs are returned correctly via move-result.
 .method public static testJ()V
-    .registers 8
+    .registers 9
 
-    create-lambda v0, LMoveResult;->lambdaJ(Ljava/lang/reflect/ArtMethod;)J
+    create-lambda v0, LMoveResult;->lambdaJ(J)J
     invoke-lambda v0, {}
     move-result v2
-    const-wide v4, 0xdeadf00dc0ffee
+    const-wide v4, 0xdeadf00dc0ffeeL
 
     if-ne v4, v2, :is_not_equal
     const-string v6, "(MoveResult) testJ success"
@@ -220,11 +220,11 @@
 
 .end method
 
-# Lambda target for testC. Always returns "0xdeadf00dc0ffee".
-.method public static lambdaJ(Ljava/lang/reflect/ArtMethod;)J
-    .registers 4
+# Lambda target for testC. Always returns "0xdeadf00dc0ffeeL".
+.method public static lambdaJ(J)J
+    .registers 5
 
-    const-wide v0, 0xdeadf00dc0ffee
+    const-wide v0, 0xdeadf00dc0ffeeL
     return-wide v0
 
 .end method
@@ -233,7 +233,7 @@
 .method public static testF()V
     .registers 6
 
-    create-lambda v0, LMoveResult;->lambdaF(Ljava/lang/reflect/ArtMethod;)F
+    create-lambda v0, LMoveResult;->lambdaF(J)F
     invoke-lambda v0, {}
     move-result v2
     const v3, infinityf
@@ -253,8 +253,8 @@
 .end method
 
 # Lambda target for testF. Always returns "infinityf".
-.method public static lambdaF(Ljava/lang/reflect/ArtMethod;)F
-    .registers 3
+.method public static lambdaF(J)F
+    .registers 4
 
     const v0, infinityf
     return v0
@@ -265,10 +265,10 @@
 .method public static testD()V
     .registers 8
 
-    create-lambda v0, LMoveResult;->lambdaD(Ljava/lang/reflect/ArtMethod;)D
+    create-lambda v0, LMoveResult;->lambdaD(J)D
     invoke-lambda v0, {}
     move-result-wide v2
-    const-wide v4, infinity
+    const-wide v4, -infinity
 
     if-ne v4, v2, :is_not_equal
     const-string v6, "(MoveResult) testD success"
@@ -285,10 +285,10 @@
 .end method
 
 # Lambda target for testD. Always returns "infinity".
-.method public static lambdaD(Ljava/lang/reflect/ArtMethod;)D
-    .registers 4
+.method public static lambdaD(J)D
+    .registers 5
 
-    const-wide v0, infinity # 123.456789
+    const-wide v0, -infinity
     return-wide v0
 
 .end method
@@ -298,7 +298,7 @@
 .method public static testL()V
     .registers 8
 
-    create-lambda v0, LMoveResult;->lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String;
+    create-lambda v0, LMoveResult;->lambdaL(J)Ljava/lang/String;
     invoke-lambda v0, {}
     move-result-object v2
     const-string v4, "Interned string"
@@ -319,8 +319,8 @@
 .end method
 
 # Lambda target for testL. Always returns "Interned string" (string).
-.method public static lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String;
-    .registers 4
+.method public static lambdaL(J)Ljava/lang/String;
+    .registers 5
 
     const-string v0, "Interned string"
     return-object v0
diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
index 38ee95a..3444b13 100644
--- a/test/955-lambda-smali/smali/TrivialHelloWorld.smali
+++ b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
@@ -25,12 +25,12 @@
 .method public static run()V
 .registers 8
     # Trivial 0-arg hello world
-    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(J)V
     # TODO: create-lambda should not write to both v0 and v1
     invoke-lambda v0, {}
 
     # Slightly more interesting 4-arg hello world
-    create-lambda v2, doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    create-lambda v2, doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
     # TODO: create-lambda should not write to both v2 and v3
     const-string v4, "A"
     const-string v5, "B"
@@ -43,9 +43,9 @@
     return-void
 .end method
 
-#TODO: should use a closure type instead of ArtMethod.
-.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
-    .registers 3 # 1 parameters, 2 locals
+#TODO: should use a closure type instead of jlong. 
+.method public static doHelloWorld(J)V
+    .registers 5 # 1 wide parameters, 3 locals
 
     const-string v0, "Hello world! (0-args, no closure)"
 
@@ -55,17 +55,17 @@
     return-void
 .end method
 
-#TODO: should use a closure type instead of ArtMethod.
-.method public static doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
-    .registers 7 # 5 parameters, 2 locals
+#TODO: should use a closure type instead of jlong. 
+.method public static doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    .registers 9 # 1 wide parameter, 4 narrow parameters, 3 locals
 
     const-string v0, " Hello world! (4-args, no closure)"
     sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
 
-    invoke-virtual {v1, p1}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
     invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
     invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
     invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p5}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
 
     invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
 
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 7f05a04..bffd0e0 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -19,6 +19,8 @@
 include art/build/Android.common_build.mk
 
 LIBARTTEST_COMMON_SRC_FILES := \
+  common/runtime_state.cc \
+  common/stack_inspect.cc \
   004-JniTest/jni_test.cc \
   004-SignalTest/signaltest.cc \
   004-ReferenceMap/stack_walk_refmap_jni.cc \
@@ -26,13 +28,11 @@
   004-UnsafeTest/unsafe_test.cc \
   044-proxy/native_proxy.cc \
   051-thread/thread_test.cc \
-  088-monitor-verification/stack_inspect.cc \
-  116-nodex2oat/nodex2oat.cc \
   117-nopatchoat/nopatchoat.cc \
-  118-noimage-dex2oat/noimage-dex2oat.cc \
   1337-gc-coverage/gc_coverage.cc \
   137-cfi/cfi.cc \
   139-register-natives/regnative.cc \
+  141-class-unload/jni_unload.cc \
   454-get-vreg/get_vreg_jni.cc \
   455-set-vreg/set_vreg_jni.cc \
   457-regs/regs_jni.cc \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 439e423..a103eac 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -370,6 +370,7 @@
 # when already tracing, and writes an error message that we do not want to check for.
 TEST_ART_BROKEN_TRACING_RUN_TESTS := \
   137-cfi \
+  141-class-unload \
   802-deoptimization
 
 ifneq (,$(filter trace stream,$(TRACE_TYPES)))
@@ -417,19 +418,71 @@
 
 TEST_ART_BROKEN_DEFAULT_RUN_TESTS :=
 
-# Known broken tests for the arm64 optimizing compiler backend.
-TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=
+# Known broken tests for the mips32 optimizing compiler backend.
+TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
+    441-checker-inliner \
+    442-checker-constant-folding \
+    444-checker-nce \
+    445-checker-licm \
+    446-checker-inliner2 \
+    447-checker-inliner3 \
+    449-checker-bce \
+    450-checker-types \
+    455-checker-gvn \
+    458-checker-instruction-simplification \
+    462-checker-inlining-across-dex-files \
+    463-checker-boolean-simplifier \
+    464-checker-inline-sharpen-calls \
+    465-checker-clinit-gvn \
+    468-checker-bool-simplifier-regression \
+    473-checker-inliner-constants \
+    474-checker-boolean-input \
+    476-checker-ctor-memory-barrier \
+    477-checker-bound-type \
+    478-checker-clinit-check-pruning \
+    478-checker-inliner-nested-loop \
+    480-checker-dead-blocks \
+    482-checker-loop-back-edge-use \
+    484-checker-register-hints \
+    485-checker-dce-loop-update \
+    485-checker-dce-switch \
+    486-checker-must-do-null-check \
+    487-checker-inline-calls \
+    488-checker-inline-recursive-calls \
+    490-checker-inline \
+    492-checker-inline-invoke-interface \
+    493-checker-inline-invoke-interface \
+    494-checker-instanceof-tests \
+    495-checker-checkcast-tests \
+    496-checker-inlining-and-class-loader \
+    508-checker-disassembly \
+    510-checker-try-catch \
+    517-checker-builder-fallthrough \
+    521-checker-array-set-null \
+    522-checker-regression-monitor-exit \
+    523-checker-can-throw-regression \
+    525-checker-arrays-and-fields \
+    526-checker-caller-callee-regs \
+    529-checker-unresolved \
+    530-checker-loops \
+    530-checker-regression-reftype-final \
+    532-checker-nonnull-arrayset \
+    534-checker-bce-deoptimization \
 
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS),64)
+ifeq (mips,$(TARGET_ARCH))
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
 endif
 
-TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=
+TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS :=
 
 # Known broken tests for the optimizing compiler.
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
+TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := \
+  455-set-vreg \
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -437,20 +490,11 @@
       $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
-# If ART_USE_OPTIMIZING_COMPILER is set to true, then the default core.art has been
-# compiled with the optimizing compiler.
-ifeq ($(ART_USE_OPTIMIZING_COMPILER),true)
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      default,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
 
 # Tests that should fail when the optimizing compiler compiles them non-debuggable.
 TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \
   454-get-vreg \
-  455-set-vreg \
   457-regs \
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -752,12 +796,7 @@
       endif
     endif
   endif
-  ifeq ($(4),jit)
-    # Use interpreter image for JIT.
-    image_suffix := interpreter
-  else
-    image_suffix := $(4)
-  endif
+  image_suffix := $(4)
   ifeq ($(9),no-image)
     test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_IMAGE_RULES
     run_test_options += --no-image
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
new file mode 100644
index 0000000..042b03b
--- /dev/null
+++ b/test/common/runtime_state.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+
+#include "base/logging.h"
+#include "dex_file-inl.h"
+#include "mirror/class-inl.h"
+#include "nth_caller_visitor.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+#include "stack.h"
+#include "thread-inl.h"
+
+namespace art {
+
+// public static native boolean hasOatFile();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasOatFile(JNIEnv* env, jclass cls) {
+  ScopedObjectAccess soa(env);
+
+  mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
+  const DexFile& dex_file = klass->GetDexFile();
+  const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
+  return (oat_dex_file != nullptr) ? JNI_TRUE : JNI_FALSE;
+}
+
+// public static native boolean runtimeIsSoftFail();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_runtimeIsSoftFail(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                  jclass cls ATTRIBUTE_UNUSED) {
+  return Runtime::Current()->IsVerificationSoftFail() ? JNI_TRUE : JNI_FALSE;
+}
+
+// public static native boolean isDex2OatEnabled();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isDex2OatEnabled(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                 jclass cls ATTRIBUTE_UNUSED) {
+  return Runtime::Current()->IsDex2OatEnabled();
+}
+
+// public static native boolean hasImage();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasImage(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                         jclass cls ATTRIBUTE_UNUSED) {
+  return Runtime::Current()->GetHeap()->HasImageSpace();
+}
+
+// public static native boolean isImageDex2OatEnabled();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isImageDex2OatEnabled(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                      jclass cls ATTRIBUTE_UNUSED) {
+  return Runtime::Current()->IsImageDex2OatEnabled();
+}
+
+}  // namespace art
diff --git a/test/088-monitor-verification/stack_inspect.cc b/test/common/stack_inspect.cc
similarity index 64%
rename from test/088-monitor-verification/stack_inspect.cc
rename to test/common/stack_inspect.cc
index e2899c3..d22cf52 100644
--- a/test/088-monitor-verification/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -27,25 +27,26 @@
 
 namespace art {
 
-// public static native void assertCallerIsInterpreted();
+// public static native boolean isCallerInterpreted();
 
-extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsInterpreted(JNIEnv* env, jclass) {
-  LOG(INFO) << "assertCallerIsInterpreted";
-
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerInterpreted(JNIEnv* env, jclass) {
   ScopedObjectAccess soa(env);
   NthCallerVisitor caller(soa.Self(), 1, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
-  LOG(INFO) << PrettyMethod(caller.caller);
-  CHECK(caller.GetCurrentShadowFrame() != nullptr);
+  return caller.GetCurrentShadowFrame() != nullptr ? JNI_TRUE : JNI_FALSE;
 }
 
-// public static native void assertCallerIsManaged();
+// public static native void assertCallerIsInterpreted();
 
-extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsManaged(JNIEnv* env, jclass cls) {
-  // Note: needs some smarts to not fail if there is no managed code, at all.
-  LOG(INFO) << "assertCallerIsManaged";
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsInterpreted(JNIEnv* env, jclass klass) {
+  CHECK(Java_Main_isCallerInterpreted(env, klass));
+}
 
+
+// public static native boolean isCallerManaged();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerManaged(JNIEnv* env, jclass cls) {
   ScopedObjectAccess soa(env);
 
   mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
@@ -54,28 +55,20 @@
   if (oat_dex_file == nullptr) {
     // No oat file, this must be a test configuration that doesn't compile at all. Ignore that the
     // result will be that we're running the interpreter.
-    return;
+    return JNI_FALSE;
   }
 
   NthCallerVisitor caller(soa.Self(), 1, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
-  LOG(INFO) << PrettyMethod(caller.caller);
 
-  if (caller.GetCurrentShadowFrame() == nullptr) {
-    // Not a shadow frame, this looks good.
-    return;
-  }
+  return caller.GetCurrentShadowFrame() != nullptr ? JNI_FALSE : JNI_TRUE;
+}
 
-  // This could be an interpret-only or a verify-at-runtime compilation, or a read-barrier variant,
-  // or... It's not really safe to just reject now. Let's look at the access flags. If the method
-  // was successfully verified, its access flags should be set to mark it preverified, except when
-  // we're running soft-fail tests.
-  if (Runtime::Current()->IsVerificationSoftFail()) {
-    // Soft-fail config. Everything should be running with interpreter access checks, potentially.
-    return;
-  }
-  CHECK(caller.caller->IsPreverified());
+// public static native void assertCallerIsManaged();
+
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsManaged(JNIEnv* env, jclass cls) {
+  CHECK(Java_Main_isCallerManaged(env, cls));
 }
 
 }  // namespace art
diff --git a/test/run-test b/test/run-test
index 73c92d4..a5b6e92 100755
--- a/test/run-test
+++ b/test/run-test
@@ -252,7 +252,7 @@
         shift
     elif [ "x$1" = "x--jit" ]; then
         run_args="${run_args} --jit"
-        image_suffix="-interpreter"
+        image_suffix="-jit"
         shift
     elif [ "x$1" = "x--optimizing" ]; then
         run_args="${run_args} -Xcompiler-option --compiler-backend=Optimizing"
@@ -392,7 +392,7 @@
 
 # Most interesting target architecture variables are Makefile variables, not environment variables.
 # Try to map the suffix64 flag and what we find in ${ANDROID_PRODUCT_OUT}/data/art-test to an architecture name.
-function guess_arch_name() {
+function guess_target_arch_name() {
     grep32bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm|x86|mips)$'`
     grep64bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm64|x86_64|mips64)$'`
     if [ "x${suffix64}" = "x64" ]; then
@@ -402,6 +402,14 @@
     fi
 }
 
+function guess_host_arch_name() {
+    if [ "x${suffix64}" = "x64" ]; then
+        host_arch_name="x86_64"
+    else
+        host_arch_name="x86"
+    fi
+}
+
 if [ "$target_mode" = "no" ]; then
     if [ "$runtime" = "jvm" ]; then
         if [ "$prebuild_mode" = "yes" ]; then
@@ -437,10 +445,11 @@
         if [ -z "$ANDROID_HOST_OUT" ]; then
             export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86
         fi
+        guess_host_arch_name
         run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}.art"
         run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}"
     else
-        guess_arch_name
+        guess_target_arch_name
         run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}"
         run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}.art"
     fi
@@ -635,7 +644,7 @@
       run_checker="yes"
       if [ "$target_mode" = "no" ]; then
         cfg_output_dir="$tmp_dir"
-        checker_arch_option=
+        checker_arch_option="--arch=${host_arch_name^^}"
       else
         cfg_output_dir="$DEX_LOCATION"
         checker_arch_option="--arch=${target_arch_name^^}"
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index eb64994..de9b35d 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -20,21 +20,11 @@
 fi
 
 common_targets="vogar vogar.jar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests"
-android_root="/data/local/tmp/system"
-linker="linker"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
 make_command=
 
-case "$TARGET_PRODUCT" in
-  (armv8|mips64r6) linker="linker64";;
-esac
-
-if [[ "$ART_TEST_ANDROID_ROOT" != "" ]]; then
-  android_root="$ART_TEST_ANDROID_ROOT"
-fi
-
 while true; do
   if [[ "$1" == "--host" ]]; then
     mode="host"
@@ -42,16 +32,6 @@
   elif [[ "$1" == "--target" ]]; then
     mode="target"
     shift
-  elif [[ "$1" == "--32" ]]; then
-    linker="linker"
-    shift
-  elif [[ "$1" == "--64" ]]; then
-    linker="linker64"
-    shift
-  elif [[ "$1" == "--android-root" ]]; then
-    shift
-    android_root=$1
-    shift
   elif [[ "$1" == -j* ]]; then
     j_arg=$1
     shift
@@ -64,23 +44,10 @@
 done
 
 if [[ $mode == "host" ]]; then
-  make_command="make $j_arg build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so"
-  echo "Executing $make_command"
-  $make_command
+  make_command="make $j_arg $showcommands build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so"
 elif [[ $mode == "target" ]]; then
-  # We need to provide our own linker in case the linker on the device
-  # is out of date.
-  env="TARGET_GLOBAL_LDFLAGS=-Wl,-dynamic-linker=$android_root/bin/$linker"
-  # gcc gives a linker error, so compile with clang.
-  # TODO: investigate and fix?
-  if [[ $TARGET_PRODUCT == "mips32r2_fp" ]]; then
-    env="$env USE_CLANG_PLATFORM_BUILD=true"
-  fi
-  # Use '-e' to force the override of TARGET_GLOBAL_LDFLAGS.
-  # Also, we build extra tools that will be used by tests, so that
-  # they are compiled with our own linker.
-  make_command="make -e $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb"
-  echo "Executing env $env $make_command"
-  env $env $make_command
+  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb"
 fi
 
+echo "Executing $make_command"
+$make_command
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 26d9ad7..80f7a37 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -86,12 +86,6 @@
     # Remove the --debug from the arguments.
     vogar_args=${vogar_args/$1}
     vogar_args="$vogar_args --vm-arg -XXlib:libartd.so"
-    if [ "$emulator" = "no" ]; then
-      # Increase the timeout, as vogar cannot set individual test
-      # timeout when being asked to run packages, and some tests go above
-      # the default timeout.
-      vogar_args="$vogar_args --timeout 240"
-    fi
     shift
   elif [[ "$1" == "" ]]; then
     break
@@ -100,10 +94,10 @@
   fi
 done
 
-if [ "$emulator" = "yes" ]; then
-  # Be very patient with the emulator.
-  vogar_args="$vogar_args --timeout 480"
-fi
+# Increase the timeout, as vogar cannot set individual test
+# timeout when being asked to run packages, and some tests go above
+# the default timeout.
+vogar_args="$vogar_args --timeout 480"
 
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"