Merge "Additional MIPS64 instructions needed by intrinsics code."
diff --git a/Android.mk b/Android.mk
index 8859d3a..b8ba9f2 100644
--- a/Android.mk
+++ b/Android.mk
@@ -33,7 +33,7 @@
 
 # Don't bother with tests unless there is a test-art*, build-art*, or related target.
 art_test_bother := false
-ifneq (,$(filter %tests test-art% valgrind-test-art% build-art%,$(MAKECMDGOALS)))
+ifneq (,$(filter tests test-art% valgrind-test-art% build-art% checkbuild,$(MAKECMDGOALS)))
   art_test_bother := true
 endif
 
@@ -119,6 +119,7 @@
 include $(art_path)/build/Android.common_test.mk
 include $(art_path)/build/Android.gtest.mk
 include $(art_path)/test/Android.run-test.mk
+include $(art_path)/benchmark/Android.mk
 
 # Sync test files to the target, depends upon all things that must be pushed to the target.
 .PHONY: test-art-target-sync
diff --git a/benchmark/Android.mk b/benchmark/Android.mk
new file mode 100644
index 0000000..09aca98
--- /dev/null
+++ b/benchmark/Android.mk
@@ -0,0 +1,78 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.common_build.mk
+
+LIBARTBENCHMARK_COMMON_SRC_FILES := \
+  jni-perf/perf_jni.cc \
+  scoped-primitive-array/scoped_primitive_array.cc
+
+# $(1): target or host
+define build-libartbenchmark
+  ifneq ($(1),target)
+    ifneq ($(1),host)
+      $$(error expected target or host for argument 1, received $(1))
+    endif
+  endif
+
+  art_target_or_host := $(1)
+
+  include $(CLEAR_VARS)
+  LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+  LOCAL_MODULE := libartbenchmark
+  ifeq ($$(art_target_or_host),target)
+    LOCAL_MODULE_TAGS := tests
+  endif
+  LOCAL_SRC_FILES := $(LIBARTBENCHMARK_COMMON_SRC_FILES)
+  LOCAL_SHARED_LIBRARIES += libart libbacktrace libnativehelper
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+  ifeq ($$(art_target_or_host),target)
+    $(call set-target-local-clang-vars)
+    $(call set-target-local-cflags-vars,debug)
+    LOCAL_SHARED_LIBRARIES += libdl
+    LOCAL_MULTILIB := both
+    # LOCAL_MODULE_PATH_32 := $(ART_TARGET_OUT)/$(ART_TARGET_ARCH_32)
+    # LOCAL_MODULE_PATH_64 := $(ART_TARGET_OUT)/$(ART_TARGET_ARCH_64)
+    LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
+    include $(BUILD_SHARED_LIBRARY)
+  else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
+    LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
+    LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
+    LOCAL_IS_HOST_MODULE := true
+    LOCAL_MULTILIB := both
+    include $(BUILD_HOST_SHARED_LIBRARY)
+  endif
+
+  # Clear locally used variables.
+  art_target_or_host :=
+endef
+
+ifeq ($(ART_BUILD_TARGET),true)
+  $(eval $(call build-libartbenchmark,target))
+endif
+ifeq ($(ART_BUILD_HOST),true)
+  $(eval $(call build-libartbenchmark,host))
+endif
+
+# Clear locally used variables.
+LOCAL_PATH :=
+LIBARTBENCHMARK_COMMON_SRC_FILES :=
diff --git a/test/999-jni-perf/info.txt b/benchmark/jni-perf/info.txt
similarity index 100%
rename from test/999-jni-perf/info.txt
rename to benchmark/jni-perf/info.txt
diff --git a/test/999-jni-perf/perf-jni.cc b/benchmark/jni-perf/perf_jni.cc
similarity index 71%
rename from test/999-jni-perf/perf-jni.cc
rename to benchmark/jni-perf/perf_jni.cc
index 51eeb83..cd8d520 100644
--- a/test/999-jni-perf/perf-jni.cc
+++ b/benchmark/jni-perf/perf_jni.cc
@@ -24,18 +24,14 @@
 
 namespace {
 
-extern "C" JNIEXPORT jint JNICALL Java_Main_perfJniEmptyCall(JNIEnv*, jobject) {
-  return 0;
+extern "C" JNIEXPORT void JNICALL Java_JniPerfBenchmark_perfJniEmptyCall(JNIEnv*, jobject) {}
+
+extern "C" JNIEXPORT void JNICALL Java_JniPerfBenchmark_perfSOACall(JNIEnv* env, jobject) {
+  ScopedObjectAccess soa(env);
 }
 
-extern "C" JNIEXPORT jint JNICALL Java_Main_perfSOACall(JNIEnv*, jobject) {
-  ScopedObjectAccess soa(Thread::Current());
-  return 0;
-}
-
-extern "C" JNIEXPORT jint JNICALL Java_Main_perfSOAUncheckedCall(JNIEnv*, jobject) {
+extern "C" JNIEXPORT void JNICALL Java_JniPerfBenchmark_perfSOAUncheckedCall(JNIEnv*, jobject) {
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  return 0;
 }
 
 }  // namespace
diff --git a/benchmark/jni-perf/src/JniPerfBenchmark.java b/benchmark/jni-perf/src/JniPerfBenchmark.java
new file mode 100644
index 0000000..b1b21ce
--- /dev/null
+++ b/benchmark/jni-perf/src/JniPerfBenchmark.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.google.caliper.SimpleBenchmark;
+
+public class JniPerfBenchmark extends SimpleBenchmark {
+  private static final String MSG = "ABCDE";
+
+  native void perfJniEmptyCall();
+  native void perfSOACall();
+  native void perfSOAUncheckedCall();
+
+  public void timeFastJNI(int N) {
+    // TODO: This might be an intrinsic.
+    for (long i = 0; i < N; i++) {
+      char c = MSG.charAt(2);
+    }
+  }
+
+  public void timeEmptyCall(int N) {
+    for (long i = 0; i < N; i++) {
+      perfJniEmptyCall();
+    }
+  }
+
+  public void timeSOACall(int N) {
+    for (long i = 0; i < N; i++) {
+      perfSOACall();
+    }
+  }
+
+  public void timeSOAUncheckedCall(int N) {
+    for (long i = 0; i < N; i++) {
+      perfSOAUncheckedCall();
+    }
+  }
+
+  {
+    System.loadLibrary("artbenchmark");
+  }
+}
diff --git a/benchmark/scoped-primitive-array/info.txt b/benchmark/scoped-primitive-array/info.txt
new file mode 100644
index 0000000..93abb7c
--- /dev/null
+++ b/benchmark/scoped-primitive-array/info.txt
@@ -0,0 +1 @@
+Tests for measuring performance of ScopedPrimitiveArray.
diff --git a/benchmark/scoped-primitive-array/scoped_primitive_array.cc b/benchmark/scoped-primitive-array/scoped_primitive_array.cc
new file mode 100644
index 0000000..1664157
--- /dev/null
+++ b/benchmark/scoped-primitive-array/scoped_primitive_array.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+#include "ScopedPrimitiveArray.h"
+
+extern "C" JNIEXPORT jlong JNICALL Java_ScopedPrimitiveArrayBenchmark_measureByteArray(
+    JNIEnv* env, jclass, int reps, jbyteArray arr) {
+  jlong ret = 0;
+  for (jint i = 0; i < reps; ++i) {
+    ScopedByteArrayRO sc(env, arr);
+    ret += sc[0] + sc[sc.size() - 1];
+  }
+  return ret;
+}
+
+extern "C" JNIEXPORT jlong JNICALL Java_ScopedPrimitiveArrayBenchmark_measureShortArray(
+    JNIEnv* env, jclass, int reps, jshortArray arr) {
+  jlong ret = 0;
+  for (jint i = 0; i < reps; ++i) {
+    ScopedShortArrayRO sc(env, arr);
+    ret += sc[0] + sc[sc.size() - 1];
+  }
+  return ret;
+}
+
+extern "C" JNIEXPORT jlong JNICALL Java_ScopedPrimitiveArrayBenchmark_measureIntArray(
+    JNIEnv* env, jclass, int reps, jintArray arr) {
+  jlong ret = 0;
+  for (jint i = 0; i < reps; ++i) {
+    ScopedIntArrayRO sc(env, arr);
+    ret += sc[0] + sc[sc.size() - 1];
+  }
+  return ret;
+}
+
+extern "C" JNIEXPORT jlong JNICALL Java_ScopedPrimitiveArrayBenchmark_measureLongArray(
+    JNIEnv* env, jclass, int reps, jlongArray arr) {
+  jlong ret = 0;
+  for (jint i = 0; i < reps; ++i) {
+    ScopedLongArrayRO sc(env, arr);
+    ret += sc[0] + sc[sc.size() - 1];
+  }
+  return ret;
+}
diff --git a/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java b/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
new file mode 100644
index 0000000..be276fe
--- /dev/null
+++ b/benchmark/scoped-primitive-array/src/ScopedPrimitiveArrayBenchmark.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.google.caliper.SimpleBenchmark; 
+
+public class ScopedPrimitiveArrayBenchmark extends SimpleBenchmark {
+  // Measure adds the first and last element of the array by using ScopedPrimitiveArray.
+  static native long measureByteArray(int reps, byte[] arr);
+  static native long measureShortArray(int reps, short[] arr);
+  static native long measureIntArray(int reps, int[] arr);
+  static native long measureLongArray(int reps, long[] arr);
+
+  static final int smallLength = 16;
+  static final int mediumLength = 256;
+  static final int largeLength = 8096;
+  static byte[] smallBytes = new byte[smallLength];
+  static byte[] mediumBytes = new byte[mediumLength];
+  static byte[] largeBytes = new byte[largeLength];
+  static short[] smallShorts = new short[smallLength];
+  static short[] mediumShorts = new short[mediumLength];
+  static short[] largeShorts = new short[largeLength];
+  static int[] smallInts = new int[smallLength];
+  static int[] mediumInts = new int[mediumLength];
+  static int[] largeInts = new int[largeLength];
+  static long[] smallLongs = new long[smallLength];
+  static long[] mediumLongs = new long[mediumLength];
+  static long[] largeLongs = new long[largeLength];
+
+  public void timeSmallBytes(int reps) {
+    measureByteArray(reps, smallBytes);
+  }
+
+  public void timeMediumBytes(int reps) {
+    measureByteArray(reps, mediumBytes);
+  }
+
+  public void timeLargeBytes(int reps) {
+    measureByteArray(reps, largeBytes);
+  }
+
+  public void timeSmallShorts(int reps) {
+    measureShortArray(reps, smallShorts);
+  }
+
+  public void timeMediumShorts(int reps) {
+    measureShortArray(reps, mediumShorts);
+  }
+
+  public void timeLargeShorts(int reps) {
+    measureShortArray(reps, largeShorts);
+  }
+
+  public void timeSmallInts(int reps) {
+    measureIntArray(reps, smallInts);
+  }
+
+  public void timeMediumInts(int reps) {
+    measureIntArray(reps, mediumInts);
+  }
+
+  public void timeLargeInts(int reps) {
+    measureIntArray(reps, largeInts);
+  }
+
+  public void timeSmallLongs(int reps) {
+    measureLongArray(reps, smallLongs);
+  }
+
+  public void timeMediumLongs(int reps) {
+    measureLongArray(reps, mediumLongs);
+  }
+
+  public void timeLargeLongs(int reps) {
+    measureLongArray(reps, largeLongs);
+  }
+
+  {
+    System.loadLibrary("artbenchmark");
+  }
+}
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index acce68b..a443487 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -348,16 +348,6 @@
 ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default $(art_host_cflags)
 ART_HOST_ASFLAGS += $(art_asflags)
 
-# Disable -Wpessimizing-move: triggered for art/runtime/base/variant_map.h:261
-# Adding this flag to art_clang_cflags doesn't work because -Wall gets added to
-# ART_HOST_CFLAGS (as a part of art_cflags) after
-# -Wno-pessimizing-move.  Instead, add the flag here to both
-# ART_TARGET_CLANG_CFLAGS and ART_HOST_CFLAGS
-ifeq ($(ART_HOST_CLANG),true)
-ART_HOST_CFLAGS += -Wno-pessimizing-move
-endif
-ART_TARGET_CLANG_CFLAGS += -Wno-pessimizing-move
-
 # The latest clang update trips over many of the files in art and never finishes
 # compiling for aarch64 with -O3 (or -O2). Drop back to -O1 while we investigate
 # to stop punishing the build server.
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 326a92b..71a55bb 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -249,6 +249,7 @@
   compiler/optimizing/graph_test.cc \
   compiler/optimizing/gvn_test.cc \
   compiler/optimizing/induction_var_analysis_test.cc \
+  compiler/optimizing/induction_var_range_test.cc \
   compiler/optimizing/licm_test.cc \
   compiler/optimizing/live_interval_test.cc \
   compiler/optimizing/nodes_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index ce9e367..41e9744 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -72,6 +72,7 @@
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
 	optimizing/induction_var_analysis.cc \
+	optimizing/induction_var_range.cc \
 	optimizing/inliner.cc \
 	optimizing/instruction_simplifier.cc \
 	optimizing/intrinsics.cc \
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index c8aa990..f2c2e22 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -202,7 +202,7 @@
         for (size_t j = 0u; j != def->num_successors; ++j) {
           SuccessorBlockInfo* successor_block_info =
               static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessor));
+                                                               kArenaAllocSuccessors));
           successor_block_info->block = j;
           successor_block_info->key = 0u;  // Not used by class init check elimination.
           bb->successor_blocks.push_back(successor_block_info);
@@ -474,7 +474,7 @@
   BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
   check_bb->successor_block_list_type = kCatch;
   SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
-      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
   successor_block_info->block = catch_handler->id;
   check_bb->successor_blocks.push_back(successor_block_info);
 }
@@ -2284,7 +2284,7 @@
   BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
   check_bb->successor_block_list_type = kCatch;
   SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
-      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+      (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
   successor_block_info->block = catch_handler->id;
   check_bb->successor_blocks.push_back(successor_block_info);
   BasicBlock* merge_block = cu_.mir_graph->GetBasicBlock(4u);
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
index 4df0a8b..28c61a8 100644
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -209,7 +209,7 @@
         for (size_t j = 0u; j != def->num_successors; ++j) {
           SuccessorBlockInfo* successor_block_info =
               static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessor));
+                                                               kArenaAllocSuccessors));
           successor_block_info->block = j;
           successor_block_info->key = 0u;  // Not used by class init check elimination.
           bb->successor_blocks.push_back(successor_block_info);
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 7976a9a..4efe4af 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -572,7 +572,7 @@
     DCHECK(case_block != nullptr);
     SuccessorBlockInfo* successor_block_info =
         static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo),
-                                                       kArenaAllocSuccessor));
+                                                       kArenaAllocSuccessors));
     successor_block_info->block = case_block->id;
     successor_block_info->key =
         (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) ?
@@ -627,7 +627,7 @@
         catches_.insert(catch_block->start_offset);
       }
       SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
-          (arena_->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+          (arena_->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
       successor_block_info->block = catch_block->id;
       successor_block_info->key = iterator.GetHandlerTypeIndex();
       cur_block->successor_blocks.push_back(successor_block_info);
@@ -2178,7 +2178,7 @@
     result_bb->successor_blocks.reserve(successor_blocks.size());
     for (SuccessorBlockInfo* sbi_old : successor_blocks) {
       SuccessorBlockInfo* sbi_new = static_cast<SuccessorBlockInfo*>(
-          arena->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+          arena->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
       memcpy(sbi_new, sbi_old, sizeof(SuccessorBlockInfo));
       result_bb->successor_blocks.push_back(sbi_new);
     }
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 1df6a4f..097abdc 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -379,7 +379,7 @@
         terminated_by_return(), dominates_return(), use_lvn(), first_mir_insn(),
         last_mir_insn(), data_flow_info(), dominators(), i_dominated(), dom_frontier(),
         predecessors(allocator->Adapter(kArenaAllocBBPredecessors)),
-        successor_blocks(allocator->Adapter(kArenaAllocSuccessor)) {
+        successor_blocks(allocator->Adapter(kArenaAllocSuccessors)) {
   }
   BasicBlockId id;
   BasicBlockId dfs_id;
diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc
index 49b7511..7858681 100644
--- a/compiler/dex/mir_graph_test.cc
+++ b/compiler/dex/mir_graph_test.cc
@@ -79,7 +79,7 @@
         for (size_t j = 0u; j != def->num_successors; ++j) {
           SuccessorBlockInfo* successor_block_info =
               static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessor));
+                                                               kArenaAllocSuccessors));
           successor_block_info->block = j;
           successor_block_info->key = 0u;  // Not used by class init check elimination.
           bb->successor_blocks.push_back(successor_block_info);
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 47123ba..a0cedff 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -118,7 +118,7 @@
         for (size_t j = 0u; j != def->num_successors; ++j) {
           SuccessorBlockInfo* successor_block_info =
               static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessor));
+                                                               kArenaAllocSuccessors));
           successor_block_info->block = j;
           successor_block_info->key = 0u;  // Not used by class init check elimination.
           bb->successor_blocks.push_back(successor_block_info);
@@ -244,7 +244,7 @@
     BasicBlock* check_bb = cu_.mir_graph->GetBasicBlock(3u);
     check_bb->successor_block_list_type = kCatch;
     SuccessorBlockInfo* successor_block_info = reinterpret_cast<SuccessorBlockInfo*>
-        (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor));
+        (cu_.arena.Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessors));
     successor_block_info->block = catch_handler->id;
     check_bb->successor_blocks.push_back(successor_block_info);
   }
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index eb8730c..868d9a4 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -547,27 +547,28 @@
     cfi_.RestoreMany(DwarfFpReg(0), fp_spill_mask_);
   }
   bool unspill_LR_to_PC = (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0;
+  uint32_t core_unspill_mask = core_spill_mask_;
   if (unspill_LR_to_PC) {
-    core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
-    core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
+    core_unspill_mask &= ~(1 << rs_rARM_LR.GetRegNum());
+    core_unspill_mask |= (1 << rs_rARM_PC.GetRegNum());
   }
-  if (core_spill_mask_ != 0u) {
-    if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
+  if (core_unspill_mask != 0u) {
+    if ((core_unspill_mask & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
       // Unspilling only low regs and/or PC, use 16-bit POP.
       constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
       NewLIR1(kThumbPop,
-              (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
-              ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
-    } else if (IsPowerOfTwo(core_spill_mask_)) {
+              (core_unspill_mask & ~(1u << rs_rARM_PC.GetRegNum())) |
+              ((core_unspill_mask & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
+    } else if (IsPowerOfTwo(core_unspill_mask)) {
       // kThumb2Pop cannot be used to unspill a single register.
-      NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
+      NewLIR1(kThumb2Pop1, CTZ(core_unspill_mask));
     } else {
-      NewLIR1(kThumb2Pop, core_spill_mask_);
+      NewLIR1(kThumb2Pop, core_unspill_mask);
     }
     // If we pop to PC, there is no further epilogue code.
     if (!unspill_LR_to_PC) {
       cfi_.AdjustCFAOffset(-num_core_spills_ * kArmPointerSize);
-      cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
+      cfi_.RestoreMany(DwarfCoreReg(0), core_unspill_mask);
       DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
     }
   }
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 42b792c..af93aab 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -39,6 +39,9 @@
     true,   // kIntrinsicReverseBits
     true,   // kIntrinsicReverseBytes
     true,   // kIntrinsicNumberOfLeadingZeros
+    true,   // kIntrinsicNumberOfTrailingZeros
+    true,   // kIntrinsicRotateRight
+    true,   // kIntrinsicRotateLeft
     true,   // kIntrinsicAbsInt
     true,   // kIntrinsicAbsLong
     true,   // kIntrinsicAbsFloat
@@ -79,6 +82,10 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros],
               "NumberOfLeadingZeros must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros],
+              "NumberOfTrailingZeros must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicRotateRight], "RotateRight must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicRotateLeft], "RotateLeft must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsInt], "AbsInt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsLong], "AbsLong must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static");
@@ -232,6 +239,9 @@
     "putOrderedObject",      // kNameCachePutOrderedObject
     "arraycopy",             // kNameCacheArrayCopy
     "numberOfLeadingZeros",  // kNameCacheNumberOfLeadingZeros
+    "numberOfTrailingZeros",  // kNameCacheNumberOfTrailingZeros
+    "rotateRight",           // kNameCacheRotateRight
+    "rotateLeft",            // kNameCacheRotateLeft
 };
 
 const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = {
@@ -289,6 +299,8 @@
     { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheShort } },
     // kProtoCacheObject_Z
     { kClassCacheBoolean, 1, { kClassCacheJavaLangObject } },
+    // kProtoCacheJI_J
+    { kClassCacheLong, 2, { kClassCacheLong, kClassCacheInt } },
     // kProtoCacheObjectJII_Z
     { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong,
         kClassCacheInt, kClassCacheInt } },
@@ -379,6 +391,8 @@
 
     INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32),
     INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64),
+    INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32),
+    INTRINSIC(JavaLangLong, NumberOfTrailingZeros, J_I, kIntrinsicNumberOfTrailingZeros, k64),
 
     INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
     INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
@@ -468,6 +482,11 @@
     INTRINSIC(JavaLangSystem, ArrayCopy, CharArrayICharArrayII_V , kIntrinsicSystemArrayCopyCharArray,
               0),
 
+    INTRINSIC(JavaLangInteger, RotateRight, II_I, kIntrinsicRotateRight, k32),
+    INTRINSIC(JavaLangLong, RotateRight, JI_J, kIntrinsicRotateRight, k64),
+    INTRINSIC(JavaLangInteger, RotateLeft, II_I, kIntrinsicRotateLeft, k32),
+    INTRINSIC(JavaLangLong, RotateLeft, JI_J, kIntrinsicRotateLeft, k64),
+
 #undef INTRINSIC
 
 #define SPECIAL(c, n, p, o, d) \
@@ -631,7 +650,10 @@
     case kIntrinsicSystemArrayCopyCharArray:
       return backend->GenInlinedArrayCopyCharArray(info);
     case kIntrinsicNumberOfLeadingZeros:
-      return false;  // not implemented in quick
+    case kIntrinsicNumberOfTrailingZeros:
+    case kIntrinsicRotateRight:
+    case kIntrinsicRotateLeft:
+      return false;   // not implemented in quick.
     default:
       LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
       return false;  // avoid warning "control reaches end of non-void function"
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index d6c8bfb..8458806 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -208,6 +208,9 @@
       kNameCachePutOrderedObject,
       kNameCacheArrayCopy,
       kNameCacheNumberOfLeadingZeros,
+      kNameCacheNumberOfTrailingZeros,
+      kNameCacheRotateRight,
+      kNameCacheRotateLeft,
       kNameCacheLast
     };
 
@@ -245,6 +248,7 @@
       kProtoCacheJJ_V,
       kProtoCacheJS_V,
       kProtoCacheObject_Z,
+      kProtoCacheJI_J,
       kProtoCacheObjectJII_Z,
       kProtoCacheObjectJJJ_Z,
       kProtoCacheObjectJObjectObject_Z,
diff --git a/compiler/dex/type_inference_test.cc b/compiler/dex/type_inference_test.cc
index 872a8d6..528a18c 100644
--- a/compiler/dex/type_inference_test.cc
+++ b/compiler/dex/type_inference_test.cc
@@ -322,7 +322,7 @@
         for (size_t j = 0u; j != def->num_successors; ++j) {
           SuccessorBlockInfo* successor_block_info =
               static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
-                                                               kArenaAllocSuccessor));
+                                                               kArenaAllocSuccessors));
           successor_block_info->block = j;
           successor_block_info->key = 0u;  // Not used by class init check elimination.
           bb->successor_blocks.push_back(successor_block_info);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 9f05e64..4c1408a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1140,23 +1140,20 @@
   if (IsImage()) {
     TimingLogger::ScopedTiming t("UpdateImageClasses", timings);
 
-    Runtime* current = Runtime::Current();
+    Runtime* runtime = Runtime::Current();
 
     // Suspend all threads.
-    current->GetThreadList()->SuspendAll(__FUNCTION__);
+    ScopedSuspendAll ssa(__FUNCTION__);
 
     std::string error_msg;
     std::unique_ptr<ClinitImageUpdate> update(ClinitImageUpdate::Create(image_classes_.get(),
                                                                         Thread::Current(),
-                                                                        current->GetClassLinker(),
+                                                                        runtime->GetClassLinker(),
                                                                         &error_msg));
     CHECK(update.get() != nullptr) << error_msg;  // TODO: Soft failure?
 
     // Do the marking.
     update->Walk();
-
-    // Resume threads.
-    current->GetThreadList()->ResumeAll();
   }
 }
 
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index 84201c3..b0e83b0 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -42,9 +42,9 @@
 // successor and the successor can only be reached from them.
 static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
   if (!block1->IsSingleGoto() || !block2->IsSingleGoto()) return false;
-  HBasicBlock* succ1 = block1->GetSuccessors().Get(0);
-  HBasicBlock* succ2 = block2->GetSuccessors().Get(0);
-  return succ1 == succ2 && succ1->GetPredecessors().Size() == 2u;
+  HBasicBlock* succ1 = block1->GetSuccessor(0);
+  HBasicBlock* succ2 = block2->GetSuccessor(0);
+  return succ1 == succ2 && succ1->GetPredecessors().size() == 2u;
 }
 
 // Returns true if the outcome of the branching matches the boolean value of
@@ -108,7 +108,7 @@
   if (!BlocksDoMergeTogether(true_block, false_block)) {
     return;
   }
-  HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
+  HBasicBlock* merge_block = true_block->GetSuccessor(0);
   if (!merge_block->HasSinglePhi()) {
     return;
   }
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index ebc0adc..0d95390 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -16,6 +16,7 @@
 
 #include "base/arena_containers.h"
 #include "bounds_check_elimination.h"
+#include "induction_var_range.h"
 #include "nodes.h"
 
 namespace art {
@@ -126,14 +127,17 @@
     return instruction_ == bound.instruction_ && constant_ == bound.constant_;
   }
 
-  static HInstruction* FromArrayLengthToArray(HInstruction* instruction) {
-    DCHECK(instruction->IsArrayLength() || instruction->IsNewArray());
-    if (instruction->IsArrayLength()) {
-      HInstruction* input = instruction->InputAt(0);
-      if (input->IsNullCheck()) {
-        input = input->AsNullCheck()->InputAt(0);
-      }
-      return input;
+  /*
+   * Hunt "under the hood" of array lengths (leading to array references),
+   * null checks (also leading to array references), and new arrays
+   * (leading to the actual length). This makes it more likely related
+   * instructions become actually comparable.
+   */
+  static HInstruction* HuntForDeclaration(HInstruction* instruction) {
+    while (instruction->IsArrayLength() ||
+           instruction->IsNullCheck() ||
+           instruction->IsNewArray()) {
+      instruction = instruction->InputAt(0);
     }
     return instruction;
   }
@@ -142,16 +146,11 @@
     if (instruction1 == instruction2) {
       return true;
     }
-
     if (instruction1 == nullptr || instruction2 == nullptr) {
       return false;
     }
-
-    // Some bounds are created with HNewArray* as the instruction instead
-    // of HArrayLength*. They are treated the same.
-    // HArrayLength with the same array input are considered equal also.
-    instruction1 = FromArrayLengthToArray(instruction1);
-    instruction2 = FromArrayLengthToArray(instruction2);
+    instruction1 = HuntForDeclaration(instruction1);
+    instruction2 = HuntForDeclaration(instruction2);
     return instruction1 == instruction2;
   }
 
@@ -275,9 +274,8 @@
       // Loop header of loop_info. Exiting loop is normal.
       return false;
     }
-    const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors();
-    for (size_t i = 0; i < successors.Size(); i++) {
-      if (!loop_info->Contains(*successors.Get(i))) {
+    for (HBasicBlock* successor : block->GetSuccessors()) {
+      if (!loop_info->Contains(*successor)) {
         // One of the successors exits the loop.
         return true;
       }
@@ -797,8 +795,8 @@
     HBasicBlock* new_pre_header = header->GetDominator();
     DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
     HBasicBlock* if_block = new_pre_header->GetDominator();
-    HBasicBlock* dummy_block = if_block->GetSuccessors().Get(0);  // True successor.
-    HBasicBlock* deopt_block = if_block->GetSuccessors().Get(1);  // False successor.
+    HBasicBlock* dummy_block = if_block->GetSuccessor(0);  // True successor.
+    HBasicBlock* deopt_block = if_block->GetSuccessor(1);  // False successor.
 
     dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
     deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
@@ -845,14 +843,14 @@
     DCHECK(header->IsLoopHeader());
     HBasicBlock* pre_header = header->GetDominator();
     if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header);
+      DCHECK(deopt_block->GetSuccessor(0) == pre_header);
     } else {
       DCHECK(deopt_block == pre_header);
     }
     HGraph* graph = header->GetGraph();
     HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
     if (loop_entry_test_block_added) {
-      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors().Get(1));
+      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessor(1));
     }
 
     HIntConstant* const_instr = graph->GetIntConstant(constant);
@@ -926,7 +924,7 @@
     DCHECK(header->IsLoopHeader());
     HBasicBlock* pre_header = header->GetDominator();
     if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header);
+      DCHECK(deopt_block->GetSuccessor(0) == pre_header);
     } else {
       DCHECK(deopt_block == pre_header);
     }
@@ -1109,9 +1107,12 @@
     return block->GetBlockId() >= initial_block_size_;
   }
 
-  explicit BCEVisitor(HGraph* graph)
-      : HGraphVisitor(graph), maps_(graph->GetBlocks().Size()),
-        need_to_revisit_block_(false), initial_block_size_(graph->GetBlocks().Size()) {}
+  BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+      : HGraphVisitor(graph),
+        maps_(graph->GetBlocks().Size()),
+        need_to_revisit_block_(false),
+        initial_block_size_(graph->GetBlocks().Size()),
+        induction_range_(induction_analysis) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
     DCHECK(!IsAddedBlock(block));
@@ -1160,6 +1161,23 @@
     return nullptr;
   }
 
+  // Return the range resulting from induction variable analysis of "instruction" when the value
+  // is used from "context", for example, an index used from a bounds-check inside a loop body.
+  ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
+    InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction);
+    InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction);
+    if ((v1.a_constant == 0 || v1.a_constant == 1) && v1.b_constant != INT_MIN &&
+        (v2.a_constant == 0 || v2.a_constant == 1) && v2.b_constant != INT_MAX) {
+      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+      ValueBound low = ValueBound(v1.instruction, v1.b_constant);
+      ValueBound up = ValueBound(v2.instruction, v2.b_constant);
+      return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up);
+    }
+    // Didn't find anything useful.
+    return nullptr;
+  }
+
   // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
   // and push the narrowed value range to `successor`.
   void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
@@ -1256,11 +1274,11 @@
 
     HBasicBlock* true_successor = instruction->IfTrueSuccessor();
     // There should be no critical edge at this point.
-    DCHECK_EQ(true_successor->GetPredecessors().Size(), 1u);
+    DCHECK_EQ(true_successor->GetPredecessors().size(), 1u);
 
     HBasicBlock* false_successor = instruction->IfFalseSuccessor();
     // There should be no critical edge at this point.
-    DCHECK_EQ(false_successor->GetPredecessors().Size(), 1u);
+    DCHECK_EQ(false_successor->GetPredecessors().size(), 1u);
 
     ValueRange* left_range = LookupValueRange(left, block);
     MonotonicValueRange* left_monotonic_range = nullptr;
@@ -1391,16 +1409,20 @@
     }
 
     if (!index->IsIntConstant()) {
+      ValueBound lower = ValueBound(nullptr, 0);        // constant 0
+      ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
+      ValueRange array_range(GetGraph()->GetArena(), lower, upper);
+      // Try range obtained by local analysis.
       ValueRange* index_range = LookupValueRange(index, block);
-      if (index_range != nullptr) {
-        ValueBound lower = ValueBound(nullptr, 0);        // constant 0
-        ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
-        ValueRange* array_range = new (GetGraph()->GetArena())
-            ValueRange(GetGraph()->GetArena(), lower, upper);
-        if (index_range->FitsIn(array_range)) {
-          ReplaceBoundsCheck(bounds_check, index);
-          return;
-        }
+      if (index_range != nullptr && index_range->FitsIn(&array_range)) {
+        ReplaceBoundsCheck(bounds_check, index);
+        return;
+      }
+      // Try range obtained by induction variable analysis.
+      index_range = LookupInductionRange(bounds_check, index);
+      if (index_range != nullptr && index_range->FitsIn(&array_range)) {
+        ReplaceBoundsCheck(bounds_check, index);
+        return;
       }
     } else {
       int32_t constant = index->AsIntConstant()->GetValue();
@@ -1468,10 +1490,10 @@
     // Start with input 1. Input 0 is from the incoming block.
     HInstruction* input1 = phi->InputAt(1);
     DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
-        *phi->GetBlock()->GetPredecessors().Get(1)));
+        *phi->GetBlock()->GetPredecessor(1)));
     for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
       DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
-          *phi->GetBlock()->GetPredecessors().Get(i)));
+          *phi->GetBlock()->GetPredecessor(i)));
       if (input1 != phi->InputAt(i)) {
         return false;
       }
@@ -1832,6 +1854,9 @@
   // Initial number of blocks.
   int32_t initial_block_size_;
 
+  // Range analysis based on induction variables.
+  InductionVarRange induction_range_;
+
   DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
 };
 
@@ -1840,7 +1865,7 @@
     return;
   }
 
-  BCEVisitor visitor(graph_);
+  BCEVisitor visitor(graph_, induction_analysis_);
   // Reverse post order guarantees a node's dominators are visited first.
   // We want to visit in the dominator-based order since if a value is known to
   // be bounded by a range at one instruction, it must be true that all uses of
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index 24b8ea7..cdff3ca 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -21,16 +21,21 @@
 
 namespace art {
 
+class HInductionVarAnalysis;
+
 class BoundsCheckElimination : public HOptimization {
  public:
-  explicit BoundsCheckElimination(HGraph* graph)
-      : HOptimization(graph, kBoundsCheckEliminiationPassName) {}
+  BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+      : HOptimization(graph, kBoundsCheckEliminiationPassName),
+        induction_analysis_(induction_analysis) {}
 
   void Run() OVERRIDE;
 
   static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
 
  private:
+  HInductionVarAnalysis* induction_analysis_;
+
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination);
 };
 
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 4701bdd..08e1e36 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -18,6 +18,7 @@
 #include "bounds_check_elimination.h"
 #include "builder.h"
 #include "gvn.h"
+#include "induction_var_analysis.h"
 #include "instruction_simplifier.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
@@ -27,101 +28,122 @@
 
 namespace art {
 
-static void RunSimplifierAndGvn(HGraph* graph) {
-  InstructionSimplifier simplify(graph);
-  simplify.Run();
-  SideEffectsAnalysis side_effects(graph);
-  side_effects.Run();
-  GVNOptimization(graph, side_effects).Run();
-}
+/**
+ * Fixture class for the BoundsCheckElimination tests.
+ */
+class BoundsCheckEliminationTest : public testing::Test {
+ public:
+  BoundsCheckEliminationTest()  : pool_(), allocator_(&pool_) {
+    graph_ = CreateGraph(&allocator_);
+    graph_->SetHasBoundsChecks(true);
+  }
+
+  ~BoundsCheckEliminationTest() { }
+
+  void RunBCE() {
+    graph_->BuildDominatorTree();
+    graph_->AnalyzeNaturalLoops();
+
+    InstructionSimplifier(graph_).Run();
+
+    SideEffectsAnalysis side_effects(graph_);
+    side_effects.Run();
+
+    GVNOptimization(graph_, side_effects).Run();
+
+    HInductionVarAnalysis induction(graph_);
+    induction.Run();
+
+    BoundsCheckElimination(graph_, &induction).Run();
+  }
+
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+};
+
 
 // if (i < 0) { array[i] = 1; // Can't eliminate. }
 // else if (i >= array.length) { array[i] = 1; // Can't eliminate. }
 // else { array[i] = 1; // Can eliminate. }
-TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  graph->SetHasBoundsChecks(true);
-
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  HInstruction* parameter1 = new (&allocator)
+TEST_F(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+  HInstruction* parameter1 = new (&allocator_)
       HParameterValue(0, Primitive::kPrimNot);  // array
-  HInstruction* parameter2 = new (&allocator)
+  HInstruction* parameter2 = new (&allocator_)
       HParameterValue(0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
-  HInstruction* constant_1 = graph->GetIntConstant(1);
-  HInstruction* constant_0 = graph->GetIntConstant(0);
+  HInstruction* constant_1 = graph_->GetIntConstant(1);
+  HInstruction* constant_0 = graph_->GetIntConstant(0);
 
-  HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block1);
-  HInstruction* cmp = new (&allocator) HGreaterThanOrEqual(parameter2, constant_0);
-  HIf* if_inst = new (&allocator) HIf(cmp);
+  HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block1);
+  HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(parameter2, constant_0);
+  HIf* if_inst = new (&allocator_) HIf(cmp);
   block1->AddInstruction(cmp);
   block1->AddInstruction(if_inst);
   entry->AddSuccessor(block1);
 
-  HBasicBlock* block2 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block2);
-  HNullCheck* null_check = new (&allocator) HNullCheck(parameter1, 0);
-  HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check2 = new (&allocator)
+  HBasicBlock* block2 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block2);
+  HNullCheck* null_check = new (&allocator_) HNullCheck(parameter1, 0);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HBoundsCheck* bounds_check2 = new (&allocator_)
       HBoundsCheck(parameter2, array_length, 0);
-  HArraySet* array_set = new (&allocator) HArraySet(
+  HArraySet* array_set = new (&allocator_) HArraySet(
     null_check, bounds_check2, constant_1, Primitive::kPrimInt, 0);
   block2->AddInstruction(null_check);
   block2->AddInstruction(array_length);
   block2->AddInstruction(bounds_check2);
   block2->AddInstruction(array_set);
 
-  HBasicBlock* block3 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block3);
-  null_check = new (&allocator) HNullCheck(parameter1, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  cmp = new (&allocator) HLessThan(parameter2, array_length);
-  if_inst = new (&allocator) HIf(cmp);
+  HBasicBlock* block3 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block3);
+  null_check = new (&allocator_) HNullCheck(parameter1, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  cmp = new (&allocator_) HLessThan(parameter2, array_length);
+  if_inst = new (&allocator_) HIf(cmp);
   block3->AddInstruction(null_check);
   block3->AddInstruction(array_length);
   block3->AddInstruction(cmp);
   block3->AddInstruction(if_inst);
 
-  HBasicBlock* block4 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block4);
-  null_check = new (&allocator) HNullCheck(parameter1, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check4 = new (&allocator)
+  HBasicBlock* block4 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block4);
+  null_check = new (&allocator_) HNullCheck(parameter1, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HBoundsCheck* bounds_check4 = new (&allocator_)
       HBoundsCheck(parameter2, array_length, 0);
-  array_set = new (&allocator) HArraySet(
+  array_set = new (&allocator_) HArraySet(
     null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
   block4->AddInstruction(null_check);
   block4->AddInstruction(array_length);
   block4->AddInstruction(bounds_check4);
   block4->AddInstruction(array_set);
 
-  HBasicBlock* block5 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block5);
-  null_check = new (&allocator) HNullCheck(parameter1, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check5 = new (&allocator)
+  HBasicBlock* block5 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block5);
+  null_check = new (&allocator_) HNullCheck(parameter1, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HBoundsCheck* bounds_check5 = new (&allocator_)
       HBoundsCheck(parameter2, array_length, 0);
-  array_set = new (&allocator) HArraySet(
+  array_set = new (&allocator_) HArraySet(
     null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
   block5->AddInstruction(null_check);
   block5->AddInstruction(array_length);
   block5->AddInstruction(bounds_check5);
   block5->AddInstruction(array_set);
 
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(exit);
+  HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(exit);
   block2->AddSuccessor(exit);
   block4->AddSuccessor(exit);
   block5->AddSuccessor(exit);
-  exit->AddInstruction(new (&allocator) HExit());
+  exit->AddInstruction(new (&allocator_) HExit());
 
   block1->AddSuccessor(block3);  // True successor
   block1->AddSuccessor(block2);  // False successor
@@ -129,10 +151,8 @@
   block3->AddSuccessor(block5);  // True successor
   block3->AddSuccessor(block4);  // False successor
 
-  graph->BuildDominatorTree();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination(graph);
-  bounds_check_elimination.Run();
+  RunBCE();
+
   ASSERT_FALSE(IsRemoved(bounds_check2));
   ASSERT_FALSE(IsRemoved(bounds_check4));
   ASSERT_TRUE(IsRemoved(bounds_check5));
@@ -143,230 +163,203 @@
 //   int j = i + Integer.MAX_VALUE;
 //   if (j < array.length) array[j] = 1;  // Can't eliminate.
 // }
-TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  graph->SetHasBoundsChecks(true);
-
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  HInstruction* parameter1 = new (&allocator)
+TEST_F(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+  HInstruction* parameter1 = new (&allocator_)
       HParameterValue(0, Primitive::kPrimNot);  // array
-  HInstruction* parameter2 = new (&allocator)
+  HInstruction* parameter2 = new (&allocator_)
       HParameterValue(0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
-  HInstruction* constant_1 = graph->GetIntConstant(1);
-  HInstruction* constant_0 = graph->GetIntConstant(0);
-  HInstruction* constant_max_int = graph->GetIntConstant(INT_MAX);
+  HInstruction* constant_1 = graph_->GetIntConstant(1);
+  HInstruction* constant_0 = graph_->GetIntConstant(0);
+  HInstruction* constant_max_int = graph_->GetIntConstant(INT_MAX);
 
-  HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block1);
-  HInstruction* cmp = new (&allocator) HLessThanOrEqual(parameter2, constant_0);
-  HIf* if_inst = new (&allocator) HIf(cmp);
+  HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block1);
+  HInstruction* cmp = new (&allocator_) HLessThanOrEqual(parameter2, constant_0);
+  HIf* if_inst = new (&allocator_) HIf(cmp);
   block1->AddInstruction(cmp);
   block1->AddInstruction(if_inst);
   entry->AddSuccessor(block1);
 
-  HBasicBlock* block2 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block2);
-  HInstruction* add = new (&allocator) HAdd(Primitive::kPrimInt, parameter2, constant_max_int);
-  HNullCheck* null_check = new (&allocator) HNullCheck(parameter1, 0);
-  HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
-  HInstruction* cmp2 = new (&allocator) HGreaterThanOrEqual(add, array_length);
-  if_inst = new (&allocator) HIf(cmp2);
+  HBasicBlock* block2 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block2);
+  HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, parameter2, constant_max_int);
+  HNullCheck* null_check = new (&allocator_) HNullCheck(parameter1, 0);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HInstruction* cmp2 = new (&allocator_) HGreaterThanOrEqual(add, array_length);
+  if_inst = new (&allocator_) HIf(cmp2);
   block2->AddInstruction(add);
   block2->AddInstruction(null_check);
   block2->AddInstruction(array_length);
   block2->AddInstruction(cmp2);
   block2->AddInstruction(if_inst);
 
-  HBasicBlock* block3 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block3);
-  HBoundsCheck* bounds_check = new (&allocator)
+  HBasicBlock* block3 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block3);
+  HBoundsCheck* bounds_check = new (&allocator_)
       HBoundsCheck(add, array_length, 0);
-  HArraySet* array_set = new (&allocator) HArraySet(
+  HArraySet* array_set = new (&allocator_) HArraySet(
     null_check, bounds_check, constant_1, Primitive::kPrimInt, 0);
   block3->AddInstruction(bounds_check);
   block3->AddInstruction(array_set);
 
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(exit);
-  exit->AddInstruction(new (&allocator) HExit());
+  HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(exit);
+  exit->AddInstruction(new (&allocator_) HExit());
   block1->AddSuccessor(exit);    // true successor
   block1->AddSuccessor(block2);  // false successor
   block2->AddSuccessor(exit);    // true successor
   block2->AddSuccessor(block3);  // false successor
   block3->AddSuccessor(exit);
 
-  graph->BuildDominatorTree();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination(graph);
-  bounds_check_elimination.Run();
+  RunBCE();
+
   ASSERT_FALSE(IsRemoved(bounds_check));
 }
 
 // if (i < array.length) {
 //   int j = i - Integer.MAX_VALUE;
-//   j = j - Integer.MAX_VALUE;  // j is (i+2) after substracting MAX_INT twice
+//   j = j - Integer.MAX_VALUE;  // j is (i+2) after subtracting MAX_INT twice
 //   if (j > 0) array[j] = 1;    // Can't eliminate.
 // }
-TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  graph->SetHasBoundsChecks(true);
-
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  HInstruction* parameter1 = new (&allocator)
+TEST_F(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+  HInstruction* parameter1 = new (&allocator_)
       HParameterValue(0, Primitive::kPrimNot);  // array
-  HInstruction* parameter2 = new (&allocator)
+  HInstruction* parameter2 = new (&allocator_)
       HParameterValue(0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
-  HInstruction* constant_1 = graph->GetIntConstant(1);
-  HInstruction* constant_0 = graph->GetIntConstant(0);
-  HInstruction* constant_max_int = graph->GetIntConstant(INT_MAX);
+  HInstruction* constant_1 = graph_->GetIntConstant(1);
+  HInstruction* constant_0 = graph_->GetIntConstant(0);
+  HInstruction* constant_max_int = graph_->GetIntConstant(INT_MAX);
 
-  HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block1);
-  HNullCheck* null_check = new (&allocator) HNullCheck(parameter1, 0);
-  HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
-  HInstruction* cmp = new (&allocator) HGreaterThanOrEqual(parameter2, array_length);
-  HIf* if_inst = new (&allocator) HIf(cmp);
+  HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block1);
+  HNullCheck* null_check = new (&allocator_) HNullCheck(parameter1, 0);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(parameter2, array_length);
+  HIf* if_inst = new (&allocator_) HIf(cmp);
   block1->AddInstruction(null_check);
   block1->AddInstruction(array_length);
   block1->AddInstruction(cmp);
   block1->AddInstruction(if_inst);
   entry->AddSuccessor(block1);
 
-  HBasicBlock* block2 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block2);
-  HInstruction* sub1 = new (&allocator) HSub(Primitive::kPrimInt, parameter2, constant_max_int);
-  HInstruction* sub2 = new (&allocator) HSub(Primitive::kPrimInt, sub1, constant_max_int);
-  HInstruction* cmp2 = new (&allocator) HLessThanOrEqual(sub2, constant_0);
-  if_inst = new (&allocator) HIf(cmp2);
+  HBasicBlock* block2 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block2);
+  HInstruction* sub1 = new (&allocator_) HSub(Primitive::kPrimInt, parameter2, constant_max_int);
+  HInstruction* sub2 = new (&allocator_) HSub(Primitive::kPrimInt, sub1, constant_max_int);
+  HInstruction* cmp2 = new (&allocator_) HLessThanOrEqual(sub2, constant_0);
+  if_inst = new (&allocator_) HIf(cmp2);
   block2->AddInstruction(sub1);
   block2->AddInstruction(sub2);
   block2->AddInstruction(cmp2);
   block2->AddInstruction(if_inst);
 
-  HBasicBlock* block3 = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block3);
-  HBoundsCheck* bounds_check = new (&allocator)
+  HBasicBlock* block3 = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block3);
+  HBoundsCheck* bounds_check = new (&allocator_)
       HBoundsCheck(sub2, array_length, 0);
-  HArraySet* array_set = new (&allocator) HArraySet(
+  HArraySet* array_set = new (&allocator_) HArraySet(
     null_check, bounds_check, constant_1, Primitive::kPrimInt, 0);
   block3->AddInstruction(bounds_check);
   block3->AddInstruction(array_set);
 
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(exit);
-  exit->AddInstruction(new (&allocator) HExit());
+  HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(exit);
+  exit->AddInstruction(new (&allocator_) HExit());
   block1->AddSuccessor(exit);    // true successor
   block1->AddSuccessor(block2);  // false successor
   block2->AddSuccessor(exit);    // true successor
   block2->AddSuccessor(block3);  // false successor
   block3->AddSuccessor(exit);
 
-  graph->BuildDominatorTree();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination(graph);
-  bounds_check_elimination.Run();
+  RunBCE();
+
   ASSERT_FALSE(IsRemoved(bounds_check));
 }
 
 // array[6] = 1; // Can't eliminate.
 // array[5] = 1; // Can eliminate.
 // array[4] = 1; // Can eliminate.
-TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  graph->SetHasBoundsChecks(true);
-
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+TEST_F(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+  HInstruction* parameter = new (&allocator_) HParameterValue(0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
-  HInstruction* constant_5 = graph->GetIntConstant(5);
-  HInstruction* constant_4 = graph->GetIntConstant(4);
-  HInstruction* constant_6 = graph->GetIntConstant(6);
-  HInstruction* constant_1 = graph->GetIntConstant(1);
+  HInstruction* constant_5 = graph_->GetIntConstant(5);
+  HInstruction* constant_4 = graph_->GetIntConstant(4);
+  HInstruction* constant_6 = graph_->GetIntConstant(6);
+  HInstruction* constant_1 = graph_->GetIntConstant(1);
 
-  HBasicBlock* block = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block);
+  HBasicBlock* block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block);
   entry->AddSuccessor(block);
 
-  HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
-  HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check6 = new (&allocator)
+  HNullCheck* null_check = new (&allocator_) HNullCheck(parameter, 0);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HBoundsCheck* bounds_check6 = new (&allocator_)
       HBoundsCheck(constant_6, array_length, 0);
-  HInstruction* array_set = new (&allocator) HArraySet(
+  HInstruction* array_set = new (&allocator_) HArraySet(
     null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0);
   block->AddInstruction(null_check);
   block->AddInstruction(array_length);
   block->AddInstruction(bounds_check6);
   block->AddInstruction(array_set);
 
-  null_check = new (&allocator) HNullCheck(parameter, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check5 = new (&allocator)
+  null_check = new (&allocator_) HNullCheck(parameter, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HBoundsCheck* bounds_check5 = new (&allocator_)
       HBoundsCheck(constant_5, array_length, 0);
-  array_set = new (&allocator) HArraySet(
+  array_set = new (&allocator_) HArraySet(
     null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
   block->AddInstruction(null_check);
   block->AddInstruction(array_length);
   block->AddInstruction(bounds_check5);
   block->AddInstruction(array_set);
 
-  null_check = new (&allocator) HNullCheck(parameter, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check4 = new (&allocator)
+  null_check = new (&allocator_) HNullCheck(parameter, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HBoundsCheck* bounds_check4 = new (&allocator_)
       HBoundsCheck(constant_4, array_length, 0);
-  array_set = new (&allocator) HArraySet(
+  array_set = new (&allocator_) HArraySet(
     null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
   block->AddInstruction(null_check);
   block->AddInstruction(array_length);
   block->AddInstruction(bounds_check4);
   block->AddInstruction(array_set);
 
-  block->AddInstruction(new (&allocator) HGoto());
+  block->AddInstruction(new (&allocator_) HGoto());
 
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(exit);
+  HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(exit);
   block->AddSuccessor(exit);
-  exit->AddInstruction(new (&allocator) HExit());
+  exit->AddInstruction(new (&allocator_) HExit());
 
-  graph->BuildDominatorTree();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination(graph);
-  bounds_check_elimination.Run();
+  RunBCE();
+
   ASSERT_FALSE(IsRemoved(bounds_check6));
   ASSERT_TRUE(IsRemoved(bounds_check5));
   ASSERT_TRUE(IsRemoved(bounds_check4));
 }
 
 // for (int i=initial; i<array.length; i+=increment) { array[i] = 10; }
-static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
-                              HInstruction** bounds_check,
-                              int initial,
-                              int increment,
-                              IfCondition cond = kCondGE) {
-  HGraph* graph = CreateGraph(allocator);
-  graph->SetHasBoundsChecks(true);
-
+static HInstruction* BuildSSAGraph1(HGraph* graph,
+                                    ArenaAllocator* allocator,
+                                    int initial,
+                                    int increment,
+                                    IfCondition cond = kCondGE) {
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -414,14 +407,14 @@
 
   null_check = new (allocator) HNullCheck(parameter, 0);
   array_length = new (allocator) HArrayLength(null_check);
-  *bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0);
+  HInstruction* bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0);
   HInstruction* array_set = new (allocator) HArraySet(
-      null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0);
+      null_check, bounds_check, constant_10, Primitive::kPrimInt, 0);
 
   HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment);
   loop_body->AddInstruction(null_check);
   loop_body->AddInstruction(array_length);
-  loop_body->AddInstruction(*bounds_check);
+  loop_body->AddInstruction(bounds_check);
   loop_body->AddInstruction(array_set);
   loop_body->AddInstruction(add);
   loop_body->AddInstruction(new (allocator) HGoto());
@@ -429,79 +422,58 @@
 
   exit->AddInstruction(new (allocator) HExit());
 
-  return graph;
+  return bounds_check;
 }
 
-TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1a) {
   // for (int i=0; i<array.length; i++) { array[i] = 10; // Can eliminate with gvn. }
-  HInstruction* bounds_check = nullptr;
-  HGraph* graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination(graph);
-  bounds_check_elimination.Run();
+  HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 0, 1);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1b) {
   // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. }
-  graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
-  bounds_check_elimination_with_initial_1.Run();
+  HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 1, 1);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1c) {
   // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. }
-  graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
-  bounds_check_elimination_with_initial_minus_1.Run();
+  HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, -1, 1);
+  RunBCE();
   ASSERT_FALSE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1d) {
   // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. }
-  graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
-  bounds_check_elimination_with_greater_than.Run();
+  HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 0, 1, kCondGT);
+  RunBCE();
   ASSERT_FALSE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1e) {
   // for (int i=0; i<array.length; i += 2) {
   //   array[i] = 10; // Can't eliminate due to overflow concern. }
-  graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_increment_2(graph);
-  bounds_check_elimination_with_increment_2.Run();
+  HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 0, 2);
+  RunBCE();
   ASSERT_FALSE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1f) {
   // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. }
-  graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph);
-  bounds_check_elimination_with_increment_2_from_1.Run();
+  HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 1, 2);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
 }
 
 // for (int i=array.length; i>0; i+=increment) { array[i-1] = 10; }
-static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
-                              HInstruction** bounds_check,
-                              int initial,
-                              int increment = -1,
-                              IfCondition cond = kCondLE) {
-  HGraph* graph = CreateGraph(allocator);
-  graph->SetHasBoundsChecks(true);
-
+static HInstruction* BuildSSAGraph2(HGraph *graph,
+                                    ArenaAllocator* allocator,
+                                    int initial,
+                                    int increment = -1,
+                                    IfCondition cond = kCondLE) {
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -551,14 +523,14 @@
   HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1);
   null_check = new (allocator) HNullCheck(parameter, 0);
   array_length = new (allocator) HArrayLength(null_check);
-  *bounds_check = new (allocator) HBoundsCheck(add, array_length, 0);
+  HInstruction* bounds_check = new (allocator) HBoundsCheck(add, array_length, 0);
   HInstruction* array_set = new (allocator) HArraySet(
-      null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0);
+      null_check, bounds_check, constant_10, Primitive::kPrimInt, 0);
   HInstruction* add_phi = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment);
   loop_body->AddInstruction(add);
   loop_body->AddInstruction(null_check);
   loop_body->AddInstruction(array_length);
-  loop_body->AddInstruction(*bounds_check);
+  loop_body->AddInstruction(bounds_check);
   loop_body->AddInstruction(array_set);
   loop_body->AddInstruction(add_phi);
   loop_body->AddInstruction(new (allocator) HGoto());
@@ -566,70 +538,51 @@
 
   exit->AddInstruction(new (allocator) HExit());
 
-  return graph;
+  return bounds_check;
 }
 
-TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2a) {
   // for (int i=array.length; i>0; i--) { array[i-1] = 10; // Can eliminate with gvn. }
-  HInstruction* bounds_check = nullptr;
-  HGraph* graph = BuildSSAGraph2(&allocator, &bounds_check, 0);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination(graph);
-  bounds_check_elimination.Run();
+  HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, 0);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2b) {
   // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. }
-  graph = BuildSSAGraph2(&allocator, &bounds_check, 1);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
-  bounds_check_elimination_with_initial_1.Run();
+  HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, 1);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2c) {
   // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. }
-  graph = BuildSSAGraph2(&allocator, &bounds_check, -1);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
-  bounds_check_elimination_with_initial_minus_1.Run();
+  HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, -1);
+  RunBCE();
   ASSERT_FALSE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2d) {
   // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. }
-  graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_less_than(graph);
-  bounds_check_elimination_with_less_than.Run();
+  HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, 0, -1, kCondLT);
+  RunBCE();
   ASSERT_FALSE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2e) {
   // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. }
-  graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph);
-  bounds_check_elimination_increment_minus_2.Run();
+  HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, 0, -2);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
 }
 
 // int[] array = new int[10];
 // for (int i=0; i<10; i+=increment) { array[i] = 10; }
-static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
-                              HInstruction** bounds_check,
-                              int initial,
-                              int increment,
-                              IfCondition cond) {
-  HGraph* graph = CreateGraph(allocator);
-  graph->SetHasBoundsChecks(true);
-
+static HInstruction* BuildSSAGraph3(HGraph* graph,
+                                    ArenaAllocator* allocator,
+                                    int initial,
+                                    int increment,
+                                    IfCondition cond) {
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -679,13 +632,13 @@
 
   HNullCheck* null_check = new (allocator) HNullCheck(new_array, 0);
   HArrayLength* array_length = new (allocator) HArrayLength(null_check);
-  *bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0);
+  HInstruction* bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0);
   HInstruction* array_set = new (allocator) HArraySet(
-      null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0);
+      null_check, bounds_check, constant_10, Primitive::kPrimInt, 0);
   HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment);
   loop_body->AddInstruction(null_check);
   loop_body->AddInstruction(array_length);
-  loop_body->AddInstruction(*bounds_check);
+  loop_body->AddInstruction(bounds_check);
   loop_body->AddInstruction(array_set);
   loop_body->AddInstruction(add);
   loop_body->AddInstruction(new (allocator) HGoto());
@@ -693,63 +646,46 @@
 
   exit->AddInstruction(new (allocator) HExit());
 
-  return graph;
+  return bounds_check;
 }
 
-TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3a) {
   // int[] array = new int[10];
   // for (int i=0; i<10; i++) { array[i] = 10; // Can eliminate. }
-  HInstruction* bounds_check = nullptr;
-  HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination(graph);
-  bounds_check_elimination.Run();
+  HInstruction* bounds_check = BuildSSAGraph3(graph_, &allocator_, 0, 1, kCondGE);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3b) {
   // int[] array = new int[10];
   // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. }
-  graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
-  bounds_check_elimination_with_initial_1.Run();
+  HInstruction* bounds_check = BuildSSAGraph3(graph_, &allocator_, 1, 1, kCondGE);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3c) {
   // int[] array = new int[10];
   // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. }
-  graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
-  bounds_check_elimination_with_greater_than.Run();
+  HInstruction* bounds_check = BuildSSAGraph3(graph_, &allocator_, 0, 1, kCondGT);
+  RunBCE();
   ASSERT_FALSE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3d) {
   // int[] array = new int[10];
   // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. }
-  graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_increment_8(graph);
-  bounds_check_elimination_increment_8.Run();
+  HInstruction* bounds_check = BuildSSAGraph3(graph_, &allocator_, 1, 8, kCondGE);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
 }
 
 // for (int i=initial; i<array.length; i++) { array[array.length-i-1] = 10; }
-static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
-                              HInstruction** bounds_check,
-                              int initial,
-                              IfCondition cond = kCondGE) {
-  HGraph* graph = CreateGraph(allocator);
-  graph->SetHasBoundsChecks(true);
-
+static HInstruction* BuildSSAGraph4(HGraph* graph,
+                                    ArenaAllocator* allocator,
+                                    int initial,
+                                    IfCondition cond = kCondGE) {
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
@@ -800,15 +736,15 @@
   HInstruction* sub = new (allocator) HSub(Primitive::kPrimInt, array_length, phi);
   HInstruction* add_minus_1 = new (allocator)
       HAdd(Primitive::kPrimInt, sub, constant_minus_1);
-  *bounds_check = new (allocator) HBoundsCheck(add_minus_1, array_length, 0);
+  HInstruction* bounds_check = new (allocator) HBoundsCheck(add_minus_1, array_length, 0);
   HInstruction* array_set = new (allocator) HArraySet(
-      null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0);
+      null_check, bounds_check, constant_10, Primitive::kPrimInt, 0);
   HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_1);
   loop_body->AddInstruction(null_check);
   loop_body->AddInstruction(array_length);
   loop_body->AddInstruction(sub);
   loop_body->AddInstruction(add_minus_1);
-  loop_body->AddInstruction(*bounds_check);
+  loop_body->AddInstruction(bounds_check);
   loop_body->AddInstruction(array_set);
   loop_body->AddInstruction(add);
   loop_body->AddInstruction(new (allocator) HGoto());
@@ -816,39 +752,27 @@
 
   exit->AddInstruction(new (allocator) HExit());
 
-  return graph;
+  return bounds_check;
 }
 
-TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination4a) {
   // for (int i=0; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate with gvn. }
-  HInstruction* bounds_check = nullptr;
-  HGraph* graph = BuildSSAGraph4(&allocator, &bounds_check, 0);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination(graph);
-  bounds_check_elimination.Run();
+  HInstruction* bounds_check = BuildSSAGraph4(graph_, &allocator_, 0);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination4b) {
   // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. }
-  graph = BuildSSAGraph4(&allocator, &bounds_check, 1);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
-  bounds_check_elimination_with_initial_1.Run();
+  HInstruction* bounds_check = BuildSSAGraph4(graph_, &allocator_, 1);
+  RunBCE();
   ASSERT_TRUE(IsRemoved(bounds_check));
+}
 
+TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination4c) {
   // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. }
-  graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT);
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
-  bounds_check_elimination_with_greater_than.Run();
+  HInstruction* bounds_check = BuildSSAGraph4(graph_, &allocator_, 0, kCondGT);
+  RunBCE();
   ASSERT_FALSE(IsRemoved(bounds_check));
 }
 
@@ -863,40 +787,34 @@
 //     }
 //  }
 // }
-TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
-  ArenaPool pool;
-  ArenaAllocator allocator(&pool);
-
-  HGraph* graph = CreateGraph(&allocator);
-  graph->SetHasBoundsChecks(true);
-
-  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(entry);
-  graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+TEST_F(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+  HInstruction* parameter = new (&allocator_) HParameterValue(0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
-  HInstruction* constant_0 = graph->GetIntConstant(0);
-  HInstruction* constant_minus_1 = graph->GetIntConstant(-1);
-  HInstruction* constant_1 = graph->GetIntConstant(1);
+  HInstruction* constant_0 = graph_->GetIntConstant(0);
+  HInstruction* constant_minus_1 = graph_->GetIntConstant(-1);
+  HInstruction* constant_1 = graph_->GetIntConstant(1);
 
-  HBasicBlock* block = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(block);
+  HBasicBlock* block = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(block);
   entry->AddSuccessor(block);
-  block->AddInstruction(new (&allocator) HGoto());
+  block->AddInstruction(new (&allocator_) HGoto());
 
-  HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(exit);
-  exit->AddInstruction(new (&allocator) HExit());
+  HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(exit);
+  exit->AddInstruction(new (&allocator_) HExit());
 
-  HBasicBlock* outer_header = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(outer_header);
-  HPhi* phi_i = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
-  HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
-  HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
-  HAdd* add = new (&allocator) HAdd(Primitive::kPrimInt, array_length, constant_minus_1);
-  HInstruction* cmp = new (&allocator) HGreaterThanOrEqual(phi_i, add);
-  HIf* if_inst = new (&allocator) HIf(cmp);
+  HBasicBlock* outer_header = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(outer_header);
+  HPhi* phi_i = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
+  HNullCheck* null_check = new (&allocator_) HNullCheck(parameter, 0);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HAdd* add = new (&allocator_) HAdd(Primitive::kPrimInt, array_length, constant_minus_1);
+  HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(phi_i, add);
+  HIf* if_inst = new (&allocator_) HIf(cmp);
   outer_header->AddPhi(phi_i);
   outer_header->AddInstruction(null_check);
   outer_header->AddInstruction(array_length);
@@ -905,15 +823,15 @@
   outer_header->AddInstruction(if_inst);
   phi_i->AddInput(constant_0);
 
-  HBasicBlock* inner_header = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(inner_header);
-  HPhi* phi_j = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
-  null_check = new (&allocator) HNullCheck(parameter, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HSub* sub = new (&allocator) HSub(Primitive::kPrimInt, array_length, phi_i);
-  add = new (&allocator) HAdd(Primitive::kPrimInt, sub, constant_minus_1);
-  cmp = new (&allocator) HGreaterThanOrEqual(phi_j, add);
-  if_inst = new (&allocator) HIf(cmp);
+  HBasicBlock* inner_header = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(inner_header);
+  HPhi* phi_j = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
+  null_check = new (&allocator_) HNullCheck(parameter, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HSub* sub = new (&allocator_) HSub(Primitive::kPrimInt, array_length, phi_i);
+  add = new (&allocator_) HAdd(Primitive::kPrimInt, sub, constant_minus_1);
+  cmp = new (&allocator_) HGreaterThanOrEqual(phi_j, add);
+  if_inst = new (&allocator_) HIf(cmp);
   inner_header->AddPhi(phi_j);
   inner_header->AddInstruction(null_check);
   inner_header->AddInstruction(array_length);
@@ -923,25 +841,25 @@
   inner_header->AddInstruction(if_inst);
   phi_j->AddInput(constant_0);
 
-  HBasicBlock* inner_body_compare = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(inner_body_compare);
-  null_check = new (&allocator) HNullCheck(parameter, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check1 = new (&allocator) HBoundsCheck(phi_j, array_length, 0);
-  HArrayGet* array_get_j = new (&allocator)
+  HBasicBlock* inner_body_compare = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(inner_body_compare);
+  null_check = new (&allocator_) HNullCheck(parameter, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HBoundsCheck* bounds_check1 = new (&allocator_) HBoundsCheck(phi_j, array_length, 0);
+  HArrayGet* array_get_j = new (&allocator_)
       HArrayGet(null_check, bounds_check1, Primitive::kPrimInt);
   inner_body_compare->AddInstruction(null_check);
   inner_body_compare->AddInstruction(array_length);
   inner_body_compare->AddInstruction(bounds_check1);
   inner_body_compare->AddInstruction(array_get_j);
-  HInstruction* j_plus_1 = new (&allocator) HAdd(Primitive::kPrimInt, phi_j, constant_1);
-  null_check = new (&allocator) HNullCheck(parameter, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HBoundsCheck* bounds_check2 = new (&allocator) HBoundsCheck(j_plus_1, array_length, 0);
-  HArrayGet* array_get_j_plus_1 = new (&allocator)
+  HInstruction* j_plus_1 = new (&allocator_) HAdd(Primitive::kPrimInt, phi_j, constant_1);
+  null_check = new (&allocator_) HNullCheck(parameter, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HBoundsCheck* bounds_check2 = new (&allocator_) HBoundsCheck(j_plus_1, array_length, 0);
+  HArrayGet* array_get_j_plus_1 = new (&allocator_)
       HArrayGet(null_check, bounds_check2, Primitive::kPrimInt);
-  cmp = new (&allocator) HGreaterThanOrEqual(array_get_j, array_get_j_plus_1);
-  if_inst = new (&allocator) HIf(cmp);
+  cmp = new (&allocator_) HGreaterThanOrEqual(array_get_j, array_get_j_plus_1);
+  if_inst = new (&allocator_) HIf(cmp);
   inner_body_compare->AddInstruction(j_plus_1);
   inner_body_compare->AddInstruction(null_check);
   inner_body_compare->AddInstruction(array_length);
@@ -950,14 +868,14 @@
   inner_body_compare->AddInstruction(cmp);
   inner_body_compare->AddInstruction(if_inst);
 
-  HBasicBlock* inner_body_swap = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(inner_body_swap);
-  j_plus_1 = new (&allocator) HAdd(Primitive::kPrimInt, phi_j, constant_1);
+  HBasicBlock* inner_body_swap = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(inner_body_swap);
+  j_plus_1 = new (&allocator_) HAdd(Primitive::kPrimInt, phi_j, constant_1);
   // temp = array[j+1]
-  null_check = new (&allocator) HNullCheck(parameter, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HInstruction* bounds_check3 = new (&allocator) HBoundsCheck(j_plus_1, array_length, 0);
-  array_get_j_plus_1 = new (&allocator)
+  null_check = new (&allocator_) HNullCheck(parameter, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HInstruction* bounds_check3 = new (&allocator_) HBoundsCheck(j_plus_1, array_length, 0);
+  array_get_j_plus_1 = new (&allocator_)
       HArrayGet(null_check, bounds_check3, Primitive::kPrimInt);
   inner_body_swap->AddInstruction(j_plus_1);
   inner_body_swap->AddInstruction(null_check);
@@ -965,48 +883,48 @@
   inner_body_swap->AddInstruction(bounds_check3);
   inner_body_swap->AddInstruction(array_get_j_plus_1);
   // array[j+1] = array[j]
-  null_check = new (&allocator) HNullCheck(parameter, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HInstruction* bounds_check4 = new (&allocator) HBoundsCheck(phi_j, array_length, 0);
-  array_get_j = new (&allocator)
+  null_check = new (&allocator_) HNullCheck(parameter, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HInstruction* bounds_check4 = new (&allocator_) HBoundsCheck(phi_j, array_length, 0);
+  array_get_j = new (&allocator_)
       HArrayGet(null_check, bounds_check4, Primitive::kPrimInt);
   inner_body_swap->AddInstruction(null_check);
   inner_body_swap->AddInstruction(array_length);
   inner_body_swap->AddInstruction(bounds_check4);
   inner_body_swap->AddInstruction(array_get_j);
-  null_check = new (&allocator) HNullCheck(parameter, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HInstruction* bounds_check5 = new (&allocator) HBoundsCheck(j_plus_1, array_length, 0);
-  HArraySet* array_set_j_plus_1 = new (&allocator)
+  null_check = new (&allocator_) HNullCheck(parameter, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HInstruction* bounds_check5 = new (&allocator_) HBoundsCheck(j_plus_1, array_length, 0);
+  HArraySet* array_set_j_plus_1 = new (&allocator_)
       HArraySet(null_check, bounds_check5, array_get_j, Primitive::kPrimInt, 0);
   inner_body_swap->AddInstruction(null_check);
   inner_body_swap->AddInstruction(array_length);
   inner_body_swap->AddInstruction(bounds_check5);
   inner_body_swap->AddInstruction(array_set_j_plus_1);
   // array[j] = temp
-  null_check = new (&allocator) HNullCheck(parameter, 0);
-  array_length = new (&allocator) HArrayLength(null_check);
-  HInstruction* bounds_check6 = new (&allocator) HBoundsCheck(phi_j, array_length, 0);
-  HArraySet* array_set_j = new (&allocator)
+  null_check = new (&allocator_) HNullCheck(parameter, 0);
+  array_length = new (&allocator_) HArrayLength(null_check);
+  HInstruction* bounds_check6 = new (&allocator_) HBoundsCheck(phi_j, array_length, 0);
+  HArraySet* array_set_j = new (&allocator_)
       HArraySet(null_check, bounds_check6, array_get_j_plus_1, Primitive::kPrimInt, 0);
   inner_body_swap->AddInstruction(null_check);
   inner_body_swap->AddInstruction(array_length);
   inner_body_swap->AddInstruction(bounds_check6);
   inner_body_swap->AddInstruction(array_set_j);
-  inner_body_swap->AddInstruction(new (&allocator) HGoto());
+  inner_body_swap->AddInstruction(new (&allocator_) HGoto());
 
-  HBasicBlock* inner_body_add = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(inner_body_add);
-  add = new (&allocator) HAdd(Primitive::kPrimInt, phi_j, constant_1);
+  HBasicBlock* inner_body_add = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(inner_body_add);
+  add = new (&allocator_) HAdd(Primitive::kPrimInt, phi_j, constant_1);
   inner_body_add->AddInstruction(add);
-  inner_body_add->AddInstruction(new (&allocator) HGoto());
+  inner_body_add->AddInstruction(new (&allocator_) HGoto());
   phi_j->AddInput(add);
 
-  HBasicBlock* outer_body_add = new (&allocator) HBasicBlock(graph);
-  graph->AddBlock(outer_body_add);
-  add = new (&allocator) HAdd(Primitive::kPrimInt, phi_i, constant_1);
+  HBasicBlock* outer_body_add = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(outer_body_add);
+  add = new (&allocator_) HAdd(Primitive::kPrimInt, phi_i, constant_1);
   outer_body_add->AddInstruction(add);
-  outer_body_add->AddInstruction(new (&allocator) HGoto());
+  outer_body_add->AddInstruction(new (&allocator_) HGoto());
   phi_i->AddInput(add);
 
   block->AddSuccessor(outer_header);
@@ -1020,19 +938,8 @@
   inner_body_add->AddSuccessor(inner_header);
   outer_body_add->AddSuccessor(outer_header);
 
-  graph->BuildDominatorTree();
-  graph->AnalyzeNaturalLoops();
-  RunSimplifierAndGvn(graph);
-  // gvn should remove the same bounds check.
-  ASSERT_FALSE(IsRemoved(bounds_check1));
-  ASSERT_FALSE(IsRemoved(bounds_check2));
-  ASSERT_TRUE(IsRemoved(bounds_check3));
-  ASSERT_TRUE(IsRemoved(bounds_check4));
-  ASSERT_TRUE(IsRemoved(bounds_check5));
-  ASSERT_TRUE(IsRemoved(bounds_check6));
+  RunBCE();  // gvn removes same bounds check already
 
-  BoundsCheckElimination bounds_check_elimination(graph);
-  bounds_check_elimination.Run();
   ASSERT_TRUE(IsRemoved(bounds_check1));
   ASSERT_TRUE(IsRemoved(bounds_check2));
   ASSERT_TRUE(IsRemoved(bounds_check3));
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 7a3aa58..0a3f083 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -398,8 +398,8 @@
 
     // Find predecessors which are not covered by the same TryItem range. Such
     // edges enter the try block and will have a TryBoundary inserted.
-    for (size_t i = 0; i < try_block->GetPredecessors().Size(); ++i) {
-      HBasicBlock* predecessor = try_block->GetPredecessors().Get(i);
+    for (size_t i = 0; i < try_block->GetPredecessors().size(); ++i) {
+      HBasicBlock* predecessor = try_block->GetPredecessor(i);
       if (predecessor->IsSingleTryBoundary()) {
         // The edge was already split because of an exit from a neighbouring
         // TryItem. We split it again and insert an entry point.
@@ -426,8 +426,7 @@
 
     // Find successors which are not covered by the same TryItem range. Such
     // edges exit the try block and will have a TryBoundary inserted.
-    for (size_t i = 0; i < try_block->GetSuccessors().Size(); ++i) {
-      HBasicBlock* successor = try_block->GetSuccessors().Get(i);
+    for (HBasicBlock* successor : try_block->GetSuccessors()) {
       if (successor->IsCatchBlock()) {
         // A catch block is always considered an entry point into its TryItem.
         // We therefore assume this is an exit point, regardless of whether
@@ -479,6 +478,8 @@
   graph_->SetEntryBlock(entry_block_);
   graph_->SetExitBlock(exit_block_);
 
+  graph_->SetHasTryCatch(code_item.tries_size_ != 0);
+
   InitializeLocals(code_item.registers_size_);
   graph_->SetMaximumNumberOfOutVRegs(code_item.outs_size_);
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 1097adb..3bbff6a 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -171,7 +171,7 @@
 
 HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
   while (block->IsSingleJump()) {
-    block = block->GetSuccessors().Get(0);
+    block = block->GetSuccessor(0);
   }
   return block;
 }
@@ -248,6 +248,12 @@
 
   GenerateSlowPaths();
 
+  // Emit catch stack maps at the end of the stack map stream as expected by the
+  // runtime exception handler.
+  if (!is_baseline && graph_->HasTryCatch()) {
+    RecordCatchBlockInfo();
+  }
+
   // Finalize instructions in assember;
   Finalize(allocator);
 }
@@ -805,6 +811,73 @@
   stack_map_stream_.EndStackMapEntry();
 }
 
+void CodeGenerator::RecordCatchBlockInfo() {
+  ArenaAllocator* arena = graph_->GetArena();
+
+  for (size_t i = 0, e = block_order_->Size(); i < e; ++i) {
+    HBasicBlock* block = block_order_->Get(i);
+    if (!block->IsCatchBlock()) {
+      continue;
+    }
+
+    uint32_t dex_pc = block->GetDexPc();
+    uint32_t num_vregs = graph_->GetNumberOfVRegs();
+    uint32_t inlining_depth = 0;  // Inlining of catch blocks is not supported at the moment.
+    uint32_t native_pc = GetAddressOf(block);
+    uint32_t register_mask = 0;   // Not used.
+
+    // The stack mask is not used, so we leave it empty.
+    ArenaBitVector* stack_mask = new (arena) ArenaBitVector(arena, 0, /* expandable */ true);
+
+    stack_map_stream_.BeginStackMapEntry(dex_pc,
+                                         native_pc,
+                                         register_mask,
+                                         stack_mask,
+                                         num_vregs,
+                                         inlining_depth);
+
+    HInstruction* current_phi = block->GetFirstPhi();
+    for (size_t vreg = 0; vreg < num_vregs; ++vreg) {
+    while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
+      HInstruction* next_phi = current_phi->GetNext();
+      DCHECK(next_phi == nullptr ||
+             current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
+          << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
+      current_phi = next_phi;
+    }
+
+      if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) {
+        stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
+      } else {
+        Location location = current_phi->GetLiveInterval()->ToLocation();
+        switch (location.GetKind()) {
+          case Location::kStackSlot: {
+            stack_map_stream_.AddDexRegisterEntry(
+                DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
+            break;
+          }
+          case Location::kDoubleStackSlot: {
+            stack_map_stream_.AddDexRegisterEntry(
+                DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
+            stack_map_stream_.AddDexRegisterEntry(
+                DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize));
+            ++vreg;
+            DCHECK_LT(vreg, num_vregs);
+            break;
+          }
+          default: {
+            // All catch phis must be allocated to a stack slot.
+            LOG(FATAL) << "Unexpected kind " << location.GetKind();
+            UNREACHABLE();
+          }
+        }
+      }
+    }
+
+    stack_map_stream_.EndStackMapEntry();
+  }
+}
+
 void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path) {
   if (environment == nullptr) return;
 
@@ -975,6 +1048,13 @@
   }
 }
 
+bool CodeGenerator::IsImplicitNullCheckAllowed(HNullCheck* null_check) const {
+  return compiler_options_.GetImplicitNullChecks() &&
+         // Null checks which might throw into a catch block need to save live
+         // registers and therefore cannot be done implicitly.
+         !null_check->CanThrowIntoCatchBlock();
+}
+
 bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
   HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves();
 
@@ -990,10 +1070,6 @@
     return;
   }
 
-  if (!compiler_options_.GetImplicitNullChecks()) {
-    return;
-  }
-
   if (!instr->CanDoImplicitNullCheckOn(instr->InputAt(0))) {
     return;
   }
@@ -1005,9 +1081,11 @@
   // and needs to record the pc.
   if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) {
     HNullCheck* null_check = first_prev_not_move->AsNullCheck();
-    // TODO: The parallel moves modify the environment. Their changes need to be reverted
-    // otherwise the stack maps at the throw point will not be correct.
-    RecordPcInfo(null_check, null_check->GetDexPc());
+    if (IsImplicitNullCheckAllowed(null_check)) {
+      // TODO: The parallel moves modify the environment. Their changes need to be
+      // reverted otherwise the stack maps at the throw point will not be correct.
+      RecordPcInfo(null_check, null_check->GetDexPc());
+    }
   }
 }
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b3c4d72..a93d07a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -237,6 +237,17 @@
   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
 
+  // Records a stack map which the runtime might use to set catch phi values
+  // during exception delivery.
+  // TODO: Replace with a catch-entering instruction that records the environment.
+  void RecordCatchBlockInfo();
+
+  // Returns true if implicit null checks are allowed in the compiler options
+  // and if the null check is not inside a try block. We currently cannot do
+  // implicit null checks in that case because we need the NullCheckSlowPath to
+  // save live registers, which may be needed by the runtime to set catch phis.
+  bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
+
   void AddSlowPath(SlowPathCode* slow_path) {
     slow_paths_.Add(slow_path);
   }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 438ef69..b3e38f0 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -48,7 +48,7 @@
 // with baseline.
 static constexpr Register kCoreSavedRegisterForBaseline = R5;
 static constexpr Register kCoreCalleeSaves[] =
-    { R5, R6, R7, R8, R10, R11, PC };
+    { R5, R6, R7, R8, R10, R11, LR };
 static constexpr SRegister kFpuCalleeSaves[] =
     { S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 };
 
@@ -66,6 +66,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
   }
@@ -86,6 +90,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
   }
@@ -150,6 +158,10 @@
     LocationSummary* locations = instruction_->GetLocations();
 
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
@@ -409,8 +421,8 @@
       method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
       call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
       relative_call_patches_(graph->GetArena()->Adapter()) {
-  // Save the PC register to mimic Quick.
-  AddAllocatedRegister(Location::RegisterLocation(PC));
+  // Always save the LR register to mimic Quick.
+  AddAllocatedRegister(Location::RegisterLocation(LR));
 }
 
 void CodeGeneratorARM::Finalize(CodeAllocator* allocator) {
@@ -599,12 +611,9 @@
     RecordPcInfo(nullptr, 0);
   }
 
-  // PC is in the list of callee-save to mimic Quick, but we need to push
-  // LR at entry instead.
-  uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR;
-  __ PushList(push_mask);
-  __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask));
-  __ cfi().RelOffsetForMany(DWARFReg(kMethodRegisterArgument), 0, push_mask, kArmWordSize);
+  __ PushList(core_spill_mask_);
+  __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
+  __ cfi().RelOffsetForMany(DWARFReg(kMethodRegisterArgument), 0, core_spill_mask_, kArmWordSize);
   if (fpu_spill_mask_ != 0) {
     SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
     __ vpushs(start_register, POPCOUNT(fpu_spill_mask_));
@@ -632,7 +641,10 @@
     __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
     __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
   }
-  __ PopList(core_spill_mask_);
+  // Pop LR into PC to return.
+  DCHECK_NE(core_spill_mask_ & (1 << LR), 0U);
+  uint32_t pop_mask = (core_spill_mask_ & (~(1 << LR))) | 1 << PC;
+  __ PopList(pop_mask);
   __ cfi().RestoreState();
   __ cfi().DefCFAOffset(GetFrameSize());
 }
@@ -2741,8 +2753,10 @@
 }
 
 void LocationsBuilderARM::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
@@ -3495,8 +3509,10 @@
 }
 
 void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
@@ -3524,7 +3540,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
     GenerateImplicitNullCheck(instruction);
   } else {
     GenerateExplicitNullCheck(instruction);
@@ -3863,8 +3879,10 @@
 }
 
 void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   if (instruction->HasUses()) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 6b1457b..5094f67 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -198,6 +198,10 @@
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
 
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
@@ -226,6 +230,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
@@ -338,6 +346,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
@@ -1580,8 +1592,10 @@
 }
 
 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
   if (instruction->HasUses()) {
@@ -1977,8 +1991,10 @@
 }
 
 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
@@ -2875,8 +2891,10 @@
 }
 
 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
@@ -2905,7 +2923,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
     GenerateImplicitNullCheck(instruction);
   } else {
     GenerateExplicitNullCheck(instruction);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 10942ef..8d60026 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -118,6 +118,10 @@
     LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
@@ -151,6 +155,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
                                   instruction_,
                                   instruction_->GetDexPc(),
@@ -269,6 +277,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
                                   instruction_,
                                   instruction_->GetDexPc(),
@@ -1566,8 +1578,10 @@
 }
 
 void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   if (instruction->HasUses()) {
@@ -1862,8 +1876,10 @@
 }
 
 void LocationsBuilderMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
@@ -2824,8 +2840,10 @@
 }
 
 void LocationsBuilderMIPS64::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
@@ -2852,7 +2870,7 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
     GenerateImplicitNullCheck(instruction);
   } else {
     GenerateExplicitNullCheck(instruction);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a5ad226..9713d6a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -56,6 +56,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
                                instruction_,
                                instruction_->GetDexPc(),
@@ -78,6 +82,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
                                instruction_,
                                instruction_->GetDexPc(),
@@ -125,6 +133,10 @@
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     InvokeRuntimeCallingConvention calling_convention;
     x86_codegen->EmitParallelMoves(
         locations->InAt(0),
@@ -1300,7 +1312,7 @@
   }
 
   // Convert the jumps into the result.
-  Label done_label;
+  NearLabel done_label;
 
   // False case: result = 0.
   __ Bind(&false_label);
@@ -1956,7 +1968,7 @@
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           Register output = out.AsRegister<Register>();
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           __ movl(output, Immediate(kPrimIntMax));
           // temp = int-to-float(output)
@@ -1981,7 +1993,7 @@
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           Register output = out.AsRegister<Register>();
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           __ movl(output, Immediate(kPrimIntMax));
           // temp = int-to-double(output)
@@ -2640,7 +2652,7 @@
   PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide);
 
   // Loop doing FPREM until we stabilize.
-  Label retry;
+  NearLabel retry;
   __ Bind(&retry);
   __ fprem();
 
@@ -2754,8 +2766,8 @@
   int shift;
   CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
 
-  Label ndiv;
-  Label end;
+  NearLabel ndiv;
+  NearLabel end;
   // If numerator is 0, the result is 0, no computation needed.
   __ testl(eax, eax);
   __ j(kNotEqual, &ndiv);
@@ -3039,8 +3051,10 @@
 }
 
 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   switch (instruction->GetType()) {
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
@@ -3229,7 +3243,7 @@
 }
 
 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
-  Label done;
+  NearLabel done;
   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
   __ shll(loc.AsRegisterPairLow<Register>(), shifter);
   __ testl(shifter, Immediate(32));
@@ -3261,7 +3275,7 @@
 }
 
 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
-  Label done;
+  NearLabel done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
   __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
   __ testl(shifter, Immediate(32));
@@ -3296,7 +3310,7 @@
 }
 
 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
-  Label done;
+  NearLabel done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
   __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
   __ testl(shifter, Immediate(32));
@@ -3471,7 +3485,7 @@
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
-  Label less, greater, done;
+  NearLabel less, greater, done;
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
       Register left_low = left.AsRegisterPairLow<Register>();
@@ -3695,7 +3709,7 @@
                                   Register object,
                                   Register value,
                                   bool value_can_be_null) {
-  Label is_null;
+  NearLabel is_null;
   if (value_can_be_null) {
     __ testl(value, value);
     __ j(kEqual, &is_null);
@@ -3984,9 +3998,11 @@
 }
 
 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
       ? Location::RequiresRegister()
       : Location::Any();
   locations->SetInAt(0, loc);
@@ -4019,7 +4035,7 @@
     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
   } else {
     DCHECK(obj.IsConstant()) << obj;
-    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
+    DCHECK(obj.GetConstant()->IsNullConstant());
     __ jmp(slow_path->GetEntryLabel());
     return;
   }
@@ -4027,7 +4043,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
     GenerateImplicitNullCheck(instruction);
   } else {
     GenerateExplicitNullCheck(instruction);
@@ -4432,8 +4448,10 @@
 }
 
 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (instruction->HasUses()) {
@@ -4928,7 +4946,7 @@
   Location cls = locations->InAt(1);
   Register out = locations->Out().AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Label done, zero;
+  NearLabel done, zero;
   SlowPathCodeX86* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 0f3eb74..43a3e52 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -57,6 +57,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
                                instruction_,
                                instruction_->GetDexPc(),
@@ -79,6 +83,10 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
                                instruction_,
                                instruction_->GetDexPc(),
@@ -177,6 +185,10 @@
     LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
@@ -1312,7 +1324,7 @@
   }
 
   // Convert the jumps into the result.
-  Label done_label;
+  NearLabel done_label;
 
   // False case: result = 0.
   __ Bind(&false_label);
@@ -1401,7 +1413,7 @@
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
-  Label less, greater, done;
+  NearLabel less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong: {
@@ -2111,7 +2123,7 @@
           // Processing a Dex `float-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           __ movl(output, Immediate(kPrimIntMax));
           // if input >= (float)INT_MAX goto done
@@ -2133,7 +2145,7 @@
           // Processing a Dex `double-to-int' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           __ movl(output, Immediate(kPrimIntMax));
           // if input >= (double)INT_MAX goto done
@@ -2175,7 +2187,7 @@
           // Processing a Dex `float-to-long' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           codegen_->Load64BitValue(output, kPrimLongMax);
           // if input >= (float)LONG_MAX goto done
@@ -2197,7 +2209,7 @@
           // Processing a Dex `double-to-long' instruction.
           XmmRegister input = in.AsFpuRegister<XmmRegister>();
           CpuRegister output = out.AsRegister<CpuRegister>();
-          Label done, nan;
+          NearLabel done, nan;
 
           codegen_->Load64BitValue(output, kPrimLongMax);
           // if input >= (double)LONG_MAX goto done
@@ -2760,7 +2772,7 @@
   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
 
   // Loop doing FPREM until we stabilize.
-  Label retry;
+  NearLabel retry;
   __ Bind(&retry);
   __ fprem();
 
@@ -2914,8 +2926,8 @@
 
     __ movl(numerator, eax);
 
-    Label no_div;
-    Label end;
+    NearLabel no_div;
+    NearLabel end;
     __ testl(eax, eax);
     __ j(kNotEqual, &no_div);
 
@@ -3194,8 +3206,10 @@
 }
 
 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::Any());
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
@@ -3748,9 +3762,11 @@
 }
 
 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
       ? Location::RequiresRegister()
       : Location::Any();
   locations->SetInAt(0, loc);
@@ -3783,7 +3799,7 @@
     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
   } else {
     DCHECK(obj.IsConstant()) << obj;
-    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
+    DCHECK(obj.GetConstant()->IsNullConstant());
     __ jmp(slow_path->GetEntryLabel());
     return;
   }
@@ -3791,7 +3807,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
-  if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+  if (codegen_->IsImplicitNullCheckAllowed(instruction)) {
     GenerateImplicitNullCheck(instruction);
   } else {
     GenerateExplicitNullCheck(instruction);
@@ -4175,8 +4191,10 @@
 }
 
 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (instruction->HasUses()) {
@@ -4229,7 +4247,7 @@
                                      CpuRegister object,
                                      CpuRegister value,
                                      bool value_can_be_null) {
-  Label is_null;
+  NearLabel is_null;
   if (value_can_be_null) {
     __ testl(value, value);
     __ j(kEqual, &is_null);
@@ -4656,7 +4674,7 @@
   Location cls = locations->InAt(1);
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Label done, zero;
+  NearLabel done, zero;
   SlowPathCodeX86_64* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 4fbb51d..72c67f5 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -561,7 +561,7 @@
   ASSERT_FALSE(equal->NeedsMaterialization());
 
   auto hook_before_codegen = [](HGraph* graph_in) {
-    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors().Get(0);
+    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessor(0);
     HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
     block->InsertInstructionBefore(move, block->GetLastInstruction());
   };
@@ -667,7 +667,7 @@
     code_block->AddInstruction(&ret);
 
     auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors().Get(0);
+      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessor(0);
       HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
@@ -733,7 +733,7 @@
     if_false_block->AddInstruction(&ret_ge);
 
     auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors().Get(0);
+      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessor(0);
       HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 50cbf5c..509478c 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -42,8 +42,8 @@
       MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited);
     }
   } else {
-    for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
-      MarkReachableBlocks(block->GetSuccessors().Get(i), visited);
+    for (HBasicBlock* successor : block->GetSuccessors()) {
+      MarkReachableBlocks(successor, visited);
     }
   }
 }
@@ -99,12 +99,12 @@
   // Connect successive blocks created by dead branches. Order does not matter.
   for (HReversePostOrderIterator it(*graph_); !it.Done();) {
     HBasicBlock* block  = it.Current();
-    if (block->IsEntryBlock() || block->GetSuccessors().Size() != 1u) {
+    if (block->IsEntryBlock() || block->GetSuccessors().size() != 1u) {
       it.Advance();
       continue;
     }
-    HBasicBlock* successor = block->GetSuccessors().Get(0);
-    if (successor->IsExitBlock() || successor->GetPredecessors().Size() != 1u) {
+    HBasicBlock* successor = block->GetSuccessor(0);
+    if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
       it.Advance();
       continue;
     }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 847d5a4..074ed71 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -29,19 +29,16 @@
   current_block_ = block;
 
   // Check consistency with respect to predecessors of `block`.
-  const GrowableArray<HBasicBlock*>& predecessors = block->GetPredecessors();
   std::map<HBasicBlock*, size_t> predecessors_count;
-  for (size_t i = 0, e = predecessors.Size(); i < e; ++i) {
-    HBasicBlock* p = predecessors.Get(i);
+  for (HBasicBlock* p : block->GetPredecessors()) {
     ++predecessors_count[p];
   }
   for (auto& pc : predecessors_count) {
     HBasicBlock* p = pc.first;
     size_t p_count_in_block_predecessors = pc.second;
-    const GrowableArray<HBasicBlock*>& p_successors = p->GetSuccessors();
     size_t block_count_in_p_successors = 0;
-    for (size_t j = 0, f = p_successors.Size(); j < f; ++j) {
-      if (p_successors.Get(j) == block) {
+    for (HBasicBlock* p_successor : p->GetSuccessors()) {
+      if (p_successor == block) {
         ++block_count_in_p_successors;
       }
     }
@@ -55,19 +52,16 @@
   }
 
   // Check consistency with respect to successors of `block`.
-  const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors();
   std::map<HBasicBlock*, size_t> successors_count;
-  for (size_t i = 0, e = successors.Size(); i < e; ++i) {
-    HBasicBlock* s = successors.Get(i);
+  for (HBasicBlock* s : block->GetSuccessors()) {
     ++successors_count[s];
   }
   for (auto& sc : successors_count) {
     HBasicBlock* s = sc.first;
     size_t s_count_in_block_successors = sc.second;
-    const GrowableArray<HBasicBlock*>& s_predecessors = s->GetPredecessors();
     size_t block_count_in_s_predecessors = 0;
-    for (size_t j = 0, f = s_predecessors.Size(); j < f; ++j) {
-      if (s_predecessors.Get(j) == block) {
+    for (HBasicBlock* s_predecessor : s->GetPredecessors()) {
+      if (s_predecessor == block) {
         ++block_count_in_s_predecessors;
       }
     }
@@ -92,8 +86,7 @@
   // Ensure that only Return(Void) and Throw jump to Exit. An exiting
   // TryBoundary may be between a Throw and the Exit if the Throw is in a try.
   if (block->IsExitBlock()) {
-    for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
-      HBasicBlock* predecessor = block->GetPredecessors().Get(i);
+    for (HBasicBlock* predecessor : block->GetPredecessors()) {
       if (predecessor->IsSingleTryBoundary()
           && !predecessor->GetLastInstruction()->AsTryBoundary()->IsEntry()) {
         HBasicBlock* real_predecessor = predecessor->GetSinglePredecessor();
@@ -178,8 +171,7 @@
                             try_boundary->GetId(),
                             handler->GetBlockId()));
     }
-    if (current_block_->GetSuccessors().Contains(
-            handler, /* start_from */ it.CurrentSuccessorIndex() + 1)) {
+    if (current_block_->HasSuccessor(handler, it.CurrentSuccessorIndex() + 1)) {
       AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.",
                             handler->GetBlockId(),
                             try_boundary->DebugName(),
@@ -359,15 +351,15 @@
   // never exceptional successors.
   const size_t num_normal_successors = block->NumberOfNormalSuccessors();
   for (size_t j = 0; j < num_normal_successors; ++j) {
-    HBasicBlock* successor = block->GetSuccessors().Get(j);
+    HBasicBlock* successor = block->GetSuccessor(j);
     if (successor->IsCatchBlock()) {
       AddError(StringPrintf("Catch block %d is a normal successor of block %d.",
                             successor->GetBlockId(),
                             block->GetBlockId()));
     }
   }
-  for (size_t j = num_normal_successors, e = block->GetSuccessors().Size(); j < e; ++j) {
-    HBasicBlock* successor = block->GetSuccessors().Get(j);
+  for (size_t j = num_normal_successors, e = block->GetSuccessors().size(); j < e; ++j) {
+    HBasicBlock* successor = block->GetSuccessor(j);
     if (!successor->IsCatchBlock()) {
       AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.",
                             successor->GetBlockId(),
@@ -381,8 +373,8 @@
   // not accounted for.
   if (block->NumberOfNormalSuccessors() > 1) {
     for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) {
-      HBasicBlock* successor = block->GetSuccessors().Get(j);
-      if (successor->GetPredecessors().Size() > 1) {
+      HBasicBlock* successor = block->GetSuccessor(j);
+      if (successor->GetPredecessors().size() > 1) {
         AddError(StringPrintf("Critical edge between blocks %d and %d.",
                               block->GetBlockId(),
                               successor->GetBlockId()));
@@ -390,17 +382,6 @@
     }
   }
 
-  // Check Phi uniqueness (no two Phis with the same type refer to the same register).
-  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-    HPhi* phi = it.Current()->AsPhi();
-    if (phi->GetNextEquivalentPhiWithSameType() != nullptr) {
-      std::stringstream type_str;
-      type_str << phi->GetType();
-      AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s",
-          phi->GetId(), phi->GetRegNumber(), type_str.str().c_str()));
-    }
-  }
-
   // Ensure try membership information is consistent.
   if (block->IsCatchBlock()) {
     if (block->IsTryBlock()) {
@@ -417,8 +398,7 @@
                             block->GetBlockId()));
     }
   } else {
-    for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) {
-      HBasicBlock* predecessor = block->GetPredecessors().Get(i);
+    for (HBasicBlock* predecessor : block->GetPredecessors()) {
       const HTryBoundary* incoming_try_entry = predecessor->ComputeTryEntryOfSuccessors();
       if (block->IsTryBlock()) {
         const HTryBoundary& stored_try_entry = block->GetTryCatchInformation()->GetTryEntry();
@@ -469,21 +449,21 @@
 
   // Ensure the loop header has only one incoming branch and the remaining
   // predecessors are back edges.
-  size_t num_preds = loop_header->GetPredecessors().Size();
+  size_t num_preds = loop_header->GetPredecessors().size();
   if (num_preds < 2) {
     AddError(StringPrintf(
         "Loop header %d has less than two predecessors: %zu.",
         id,
         num_preds));
   } else {
-    HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
+    HBasicBlock* first_predecessor = loop_header->GetPredecessor(0);
     if (loop_information->IsBackEdge(*first_predecessor)) {
       AddError(StringPrintf(
           "First predecessor of loop header %d is a back edge.",
           id));
     }
-    for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) {
-      HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i);
+    for (size_t i = 1, e = loop_header->GetPredecessors().size(); i < e; ++i) {
+      HBasicBlock* predecessor = loop_header->GetPredecessor(i);
       if (!loop_information->IsBackEdge(*predecessor)) {
         AddError(StringPrintf(
             "Loop header %d has multiple incoming (non back edge) blocks.",
@@ -586,6 +566,35 @@
   }
 }
 
+static bool IsSameSizeConstant(HInstruction* insn1, HInstruction* insn2) {
+  return insn1->IsConstant()
+      && insn2->IsConstant()
+      && Primitive::Is64BitType(insn1->GetType()) == Primitive::Is64BitType(insn2->GetType());
+}
+
+static bool IsConstantEquivalent(HInstruction* insn1, HInstruction* insn2, BitVector* visited) {
+  if (insn1->IsPhi() &&
+      insn1->AsPhi()->IsVRegEquivalentOf(insn2) &&
+      insn1->InputCount() == insn2->InputCount()) {
+    // Testing only one of the two inputs for recursion is sufficient.
+    if (visited->IsBitSet(insn1->GetId())) {
+      return true;
+    }
+    visited->SetBit(insn1->GetId());
+
+    for (size_t i = 0, e = insn1->InputCount(); i < e; ++i) {
+      if (!IsConstantEquivalent(insn1->InputAt(i), insn2->InputAt(i), visited)) {
+        return false;
+      }
+    }
+    return true;
+  } else if (IsSameSizeConstant(insn1, insn2)) {
+    return insn1->AsConstant()->GetValueAsUint64() == insn2->AsConstant()->GetValueAsUint64();
+  } else {
+    return false;
+  }
+}
+
 void SSAChecker::VisitPhi(HPhi* phi) {
   VisitInstruction(phi);
 
@@ -621,20 +630,19 @@
   } else {
     // Ensure the number of inputs of a non-catch phi is the same as the number
     // of its predecessors.
-    const GrowableArray<HBasicBlock*>& predecessors =
-        phi->GetBlock()->GetPredecessors();
-    if (phi->InputCount() != predecessors.Size()) {
+    const ArenaVector<HBasicBlock*>& predecessors = phi->GetBlock()->GetPredecessors();
+    if (phi->InputCount() != predecessors.size()) {
       AddError(StringPrintf(
           "Phi %d in block %d has %zu inputs, "
           "but block %d has %zu predecessors.",
           phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(),
-          phi->GetBlock()->GetBlockId(), predecessors.Size()));
+          phi->GetBlock()->GetBlockId(), predecessors.size()));
     } else {
       // Ensure phi input at index I either comes from the Ith
       // predecessor or from a block that dominates this predecessor.
       for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
         HInstruction* input = phi->InputAt(i);
-        HBasicBlock* predecessor = predecessors.Get(i);
+        HBasicBlock* predecessor = predecessors[i];
         if (!(input->GetBlock() == predecessor
               || input->GetBlock()->Dominates(predecessor))) {
           AddError(StringPrintf(
@@ -646,6 +654,45 @@
       }
     }
   }
+
+  // Ensure that catch phis are sorted by their vreg number, as required by
+  // the register allocator and code generator. This does not apply to normal
+  // phis which can be constructed artifically.
+  if (phi->IsCatchPhi()) {
+    HInstruction* next_phi = phi->GetNext();
+    if (next_phi != nullptr && phi->GetRegNumber() > next_phi->AsPhi()->GetRegNumber()) {
+      AddError(StringPrintf("Catch phis %d and %d in block %d are not sorted by their "
+                            "vreg numbers.",
+                            phi->GetId(),
+                            next_phi->GetId(),
+                            phi->GetBlock()->GetBlockId()));
+    }
+  }
+
+  // Test phi equivalents. There should not be two of the same type and they
+  // should only be created for constants which were untyped in DEX.
+  for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+    HPhi* other_phi = phi_it.Current()->AsPhi();
+    if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
+      if (phi->GetType() == other_phi->GetType()) {
+        std::stringstream type_str;
+        type_str << phi->GetType();
+        AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
+                              phi->GetId(),
+                              phi->GetRegNumber(),
+                              type_str.str().c_str()));
+      } else {
+        ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
+        if (!IsConstantEquivalent(phi, other_phi, &visited)) {
+          AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
+                                "are not equivalents of constants.",
+                                phi->GetId(),
+                                other_phi->GetId(),
+                                phi->GetRegNumber()));
+        }
+      }
+    }
+  }
 }
 
 void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_index) {
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 59d5092..7968e88 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -99,7 +99,7 @@
   ASSERT_NE(false_block, return_block);
 
   // Ensure the new block branches to the join block.
-  ASSERT_EQ(false_block->GetSuccessors().Get(0), return_block);
+  ASSERT_EQ(false_block->GetSuccessor(0), return_block);
 }
 
 // Test that the successors of an if block stay consistent after a SimplifyCFG.
@@ -134,7 +134,7 @@
   ASSERT_NE(true_block, return_block);
 
   // Ensure the new block branches to the join block.
-  ASSERT_EQ(true_block->GetSuccessors().Get(0), return_block);
+  ASSERT_EQ(true_block->GetSuccessor(0), return_block);
 }
 
 // Test that the successors of an if block stay consistent after a SimplifyCFG.
@@ -163,12 +163,12 @@
   ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor(), return_block);
 
   // Ensure there is only one back edge.
-  ASSERT_EQ(if_block->GetPredecessors().Size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors().Get(0), entry_block);
-  ASSERT_NE(if_block->GetPredecessors().Get(1), if_block);
+  ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
+  ASSERT_EQ(if_block->GetPredecessor(0), entry_block);
+  ASSERT_NE(if_block->GetPredecessor(1), if_block);
 
   // Ensure the new block is the back edge.
-  ASSERT_EQ(if_block->GetPredecessors().Get(1),
+  ASSERT_EQ(if_block->GetPredecessor(1),
             if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor());
 }
 
@@ -198,12 +198,12 @@
   ASSERT_EQ(if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor(), return_block);
 
   // Ensure there is only one back edge.
-  ASSERT_EQ(if_block->GetPredecessors().Size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessors().Get(0), entry_block);
-  ASSERT_NE(if_block->GetPredecessors().Get(1), if_block);
+  ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
+  ASSERT_EQ(if_block->GetPredecessor(0), entry_block);
+  ASSERT_NE(if_block->GetPredecessor(1), if_block);
 
   // Ensure the new block is the back edge.
-  ASSERT_EQ(if_block->GetPredecessors().Get(1),
+  ASSERT_EQ(if_block->GetPredecessor(1),
             if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor());
 }
 
@@ -238,11 +238,11 @@
   ASSERT_EQ(if_instr->IfFalseSuccessor(), return_block);
 
   // Ensure there is only one pre header..
-  ASSERT_EQ(loop_block->GetPredecessors().Size(), 2u);
+  ASSERT_EQ(loop_block->GetPredecessors().size(), 2u);
 
   // Ensure the new block is the successor of the true block.
-  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().Size(), 1u);
-  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().Get(0),
+  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().size(), 1u);
+  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessor(0),
             loop_block->GetLoopInformation()->GetPreHeader());
 }
 
@@ -276,11 +276,11 @@
   ASSERT_EQ(if_instr->IfTrueSuccessor(), return_block);
 
   // Ensure there is only one pre header..
-  ASSERT_EQ(loop_block->GetPredecessors().Size(), 2u);
+  ASSERT_EQ(loop_block->GetPredecessors().size(), 2u);
 
   // Ensure the new block is the successor of the false block.
-  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().Size(), 1u);
-  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().Get(0),
+  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().size(), 1u);
+  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessor(0),
             loop_block->GetLoopInformation()->GetPreHeader());
 }
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 069a7a4..5b8e386 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -240,8 +240,7 @@
   void PrintPredecessors(HBasicBlock* block) {
     AddIndent();
     output_ << "predecessors";
-    for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
-      HBasicBlock* predecessor = block->GetPredecessors().Get(i);
+    for (HBasicBlock* predecessor : block->GetPredecessors()) {
       output_ << " \"B" << predecessor->GetBlockId() << "\" ";
     }
     if (block->IsEntryBlock() && (disasm_info_ != nullptr)) {
@@ -254,7 +253,7 @@
     AddIndent();
     output_ << "successors";
     for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) {
-      HBasicBlock* successor = block->GetSuccessors().Get(i);
+      HBasicBlock* successor = block->GetSuccessor(i);
       output_ << " \"B" << successor->GetBlockId() << "\" ";
     }
     output_<< std::endl;
@@ -263,8 +262,8 @@
   void PrintExceptionHandlers(HBasicBlock* block) {
     AddIndent();
     output_ << "xhandlers";
-    for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().Size(); ++i) {
-      HBasicBlock* handler = block->GetSuccessors().Get(i);
+    for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().size(); ++i) {
+      HBasicBlock* handler = block->GetSuccessor(i);
       output_ << " \"B" << handler->GetBlockId() << "\" ";
     }
     if (block->IsExitBlock() &&
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 833dfb0..5bb4e8e 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -340,8 +340,8 @@
 
 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
   ValueSet* set = nullptr;
-  const GrowableArray<HBasicBlock*>& predecessors = block->GetPredecessors();
-  if (predecessors.Size() == 0 || predecessors.Get(0)->IsEntryBlock()) {
+  const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
+  if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) {
     // The entry block should only accumulate constant instructions, and
     // the builder puts constants only in the entry block.
     // Therefore, there is no need to propagate the value set to the next block.
@@ -349,8 +349,8 @@
   } else {
     HBasicBlock* dominator = block->GetDominator();
     ValueSet* dominator_set = sets_.Get(dominator->GetBlockId());
-    if (dominator->GetSuccessors().Size() == 1) {
-      DCHECK_EQ(dominator->GetSuccessors().Get(0), block);
+    if (dominator->GetSuccessors().size() == 1) {
+      DCHECK_EQ(dominator->GetSuccessor(0), block);
       set = dominator_set;
     } else {
       // We have to copy if the dominator has other successors, or `block` is not a successor
@@ -361,9 +361,9 @@
       if (block->IsLoopHeader()) {
         DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
         set->Kill(side_effects_.GetLoopEffects(block));
-      } else if (predecessors.Size() > 1) {
-        for (size_t i = 0, e = predecessors.Size(); i < e; ++i) {
-          set->IntersectWith(sets_.Get(predecessors.Get(i)->GetBlockId()));
+      } else if (predecessors.size() > 1) {
+        for (HBasicBlock* predecessor : predecessors) {
+          set->IntersectWith(sets_.Get(predecessor->GetBlockId()));
           if (set->IsEmpty()) {
             break;
           }
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 3f5a6e7..92c732c 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -15,6 +15,7 @@
  */
 
 #include "induction_var_analysis.h"
+#include "induction_var_range.h"
 
 namespace art {
 
@@ -42,6 +43,40 @@
       instruction->GetBlock() == loop->GetHeader();
 }
 
+/**
+ * Since graph traversal may enter a SCC at any position, an initial representation may be rotated,
+ * along dependences, viz. any of (a, b, c, d), (d, a, b, c)  (c, d, a, b), (b, c, d, a) assuming
+ * a chain of dependences (mutual independent items may occur in arbitrary order). For proper
+ * classification, the lexicographically first entry-phi is rotated to the front.
+ */
+static void RotateEntryPhiFirst(HLoopInformation* loop,
+                                ArenaVector<HInstruction*>* scc,
+                                ArenaVector<HInstruction*>* new_scc) {
+  // Find very first entry-phi.
+  const HInstructionList& phis = loop->GetHeader()->GetPhis();
+  HInstruction* phi = nullptr;
+  size_t phi_pos = -1;
+  const size_t size = scc->size();
+  for (size_t i = 0; i < size; i++) {
+    if (IsEntryPhi(loop, scc->at(i)) && (phi == nullptr || phis.FoundBefore(scc->at(i), phi))) {
+      phi = scc->at(i);
+      phi_pos = i;
+    }
+  }
+
+  // If found, bring that entry-phi to front.
+  if (phi != nullptr) {
+    new_scc->clear();
+    for (size_t i = 0; i < size; i++) {
+      DCHECK_LT(phi_pos, size);
+      new_scc->push_back(scc->at(phi_pos));
+      if (++phi_pos >= size) phi_pos = 0;
+    }
+    DCHECK_EQ(size, new_scc->size());
+    scc->swap(*new_scc);
+  }
+}
+
 //
 // Class methods.
 //
@@ -203,7 +238,15 @@
 void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) {
   const size_t size = scc_.size();
   DCHECK_GE(size, 1u);
-  HInstruction* phi = scc_[size - 1];
+
+  // Rotate proper entry-phi to front.
+  if (size > 1) {
+    ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter());
+    RotateEntryPhiFirst(loop, &scc_, &other);
+  }
+
+  // Analyze from phi onwards.
+  HInstruction* phi = scc_[0];
   if (!IsEntryPhi(loop, phi)) {
     return;
   }
@@ -225,7 +268,7 @@
 
   // Inspect remainder of the cycle that resides in scc_. The cycle_ mapping assigns
   // temporary meaning to its nodes, seeded from the phi instruction and back.
-  for (size_t i = 0; i < size - 1; i++) {
+  for (size_t i = 1; i < size; i++) {
     HInstruction* instruction = scc_[i];
     InductionInfo* update = nullptr;
     if (instruction->IsPhi()) {
@@ -249,19 +292,19 @@
     InductionInfo* induction = it->second;
     switch (induction->induction_class) {
       case kInvariant:
-        // Classify phi (last element in scc_) and then the rest of the cycle "on-demand".
-        // Statements are scanned in the Tarjan SCC order, with phi first.
+        // Classify first phi and then the rest of the cycle "on-demand".
+        // Statements are scanned in order.
         AssignInfo(loop, phi, CreateInduction(kLinear, induction, initial));
-        for (size_t i = 0; i < size - 1; i++) {
+        for (size_t i = 1; i < size; i++) {
           ClassifyTrivial(loop, scc_[i]);
         }
         break;
       case kPeriodic:
-        // Classify all elements in the cycle with the found periodic induction while rotating
-        // each first element to the end. Lastly, phi (last element in scc_) is classified.
-        // Statements are scanned in the reverse Tarjan SCC order, with phi last.
-        for (size_t i = 2; i <= size; i++) {
-          AssignInfo(loop, scc_[size - i], induction);
+        // Classify all elements in the cycle with the found periodic induction while
+        // rotating each first element to the end. Lastly, phi is classified.
+        // Statements are scanned in reverse order.
+        for (size_t i = size - 1; i >= 1; i--) {
+          AssignInfo(loop, scc_[i], induction);
           induction = RotatePeriodicInduction(induction->op_b, induction->op_a);
         }
         AssignInfo(loop, phi, induction);
@@ -511,12 +554,15 @@
     InductionInfo* stride = a->op_a;
     InductionInfo* lo_val = a->op_b;
     InductionInfo* hi_val = b;
-    int64_t value = -1;
-    if (IsIntAndGet(stride, &value)) {
-      if ((value > 0 && (cmp == kCondLT || cmp == kCondLE)) ||
-          (value < 0 && (cmp == kCondGT || cmp == kCondGE))) {
+    // Analyze the stride thoroughly, since its representation may be compound at this point.
+    InductionVarRange::Value v1 = InductionVarRange::GetMin(stride, nullptr);
+    InductionVarRange::Value v2 = InductionVarRange::GetMax(stride, nullptr);
+    if (v1.a_constant == 0 && v2.a_constant == 0 && v1.b_constant == v2.b_constant) {
+      const int32_t stride_value = v1.b_constant;
+      if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) ||
+          (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) {
         bool is_strict = cmp == kCondLT || cmp == kCondGT;
-        VisitTripCount(loop, lo_val, hi_val, stride, value, type, is_strict);
+        VisitTripCount(loop, lo_val, hi_val, stride, stride_value, type, is_strict);
       }
     }
   }
@@ -544,7 +590,7 @@
   //       least once. Otherwise TC is 0. Also, the expression assumes the loop does not
   //       have any early-exits. Otherwise, TC is an upper bound.
   //
-  bool cancels = is_strict && abs(stride_value) == 1;  // compensation cancels conversion?
+  bool cancels = is_strict && std::abs(stride_value) == 1;  // compensation cancels conversion?
   if (!cancels) {
     // Convert exclusive integral inequality into inclusive integral inequality,
     // viz. condition i < U is i <= U - 1 and condition i > U is i >= U + 1.
@@ -557,7 +603,7 @@
   }
 
   // Assign the trip-count expression to the loop control. Clients that use the information
-  // should be aware that due to the L <= U assumption, the expression is only valid in the
+  // should be aware that due to the top-test assumption, the expression is only valid in the
   // loop-body proper, and not yet in the loop-header. If the loop has any early exits, the
   // trip-count forms a conservative upper bound on the number of loop iterations.
   InductionInfo* trip_count =
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index bd90334..486e904 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -126,6 +126,7 @@
 }
 
 InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
+                                                     HInductionVarAnalysis::InductionInfo* trip,
                                                      int32_t fail_value) {
   // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes
   // more likely range analysis will compare the same instructions as terminal nodes.
@@ -134,9 +135,16 @@
     return Value(value);
   } else if (instruction->IsAdd()) {
     if (IsIntAndGet(instruction->InputAt(0), &value)) {
-      return AddValue(Value(value), GetFetch(instruction->InputAt(1), fail_value), fail_value);
+      return AddValue(Value(value),
+                      GetFetch(instruction->InputAt(1), trip, fail_value), fail_value);
     } else if (IsIntAndGet(instruction->InputAt(1), &value)) {
-      return AddValue(GetFetch(instruction->InputAt(0), fail_value), Value(value), fail_value);
+      return AddValue(GetFetch(instruction->InputAt(0), trip, fail_value),
+                      Value(value), fail_value);
+    }
+  } else if (fail_value < 0) {
+    // Special case: within the loop-body, minimum of trip-count is 1.
+    if (trip != nullptr && instruction == trip->op_b->fetch) {
+      return Value(1);
     }
   }
   return Value(instruction, 1, 0);
@@ -163,7 +171,7 @@
           case HInductionVarAnalysis::kDiv:
             return GetDiv(info->op_a, info->op_b, trip, INT_MIN);
           case HInductionVarAnalysis::kFetch:
-            return GetFetch(info->fetch, INT_MIN);
+            return GetFetch(info->fetch, trip, INT_MIN);
         }
         break;
       case HInductionVarAnalysis::kLinear:
@@ -200,7 +208,7 @@
           case HInductionVarAnalysis::kDiv:
             return GetDiv(info->op_a, info->op_b, trip, INT_MAX);
           case HInductionVarAnalysis::kFetch:
-            return GetFetch(info->fetch, INT_MAX);
+            return GetFetch(info->fetch, trip, INT_MAX);
         }
         break;
       case HInductionVarAnalysis::kLinear:
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index b079076..e002e5f 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -27,7 +27,7 @@
  * API to obtain a conservative lower and upper bound value on each instruction in the HIR.
  *
  * For example, given a linear induction 2 * i + x where 0 <= i <= 10, range analysis yields lower
- * bound value x and upper bound value x + 20 for the expression, thus, the range [0, x + 20].
+ * bound value x and upper bound value x + 20 for the expression, thus, the range [x, x + 20].
  */
 class InductionVarRange {
  public:
@@ -39,7 +39,7 @@
    */
   struct Value {
     Value(HInstruction* i, int32_t a, int32_t b)
-        : instruction(a ? i : nullptr),
+        : instruction(a != 0 ? i : nullptr),
           a_constant(a),
           b_constant(b) {}
     explicit Value(int32_t b) : Value(nullptr, 0, b) {}
@@ -70,7 +70,9 @@
   HInductionVarAnalysis::InductionInfo* GetTripCount(HLoopInformation* loop,
                                                      HInstruction* context);
 
-  static Value GetFetch(HInstruction* instruction, int32_t fail_value);
+  static Value GetFetch(HInstruction* instruction,
+                        HInductionVarAnalysis::InductionInfo* trip,
+                        int32_t fail_value);
 
   static Value GetMin(HInductionVarAnalysis::InductionInfo* info,
                       HInductionVarAnalysis::InductionInfo* trip);
@@ -78,10 +80,12 @@
                       HInductionVarAnalysis::InductionInfo* trip);
   static Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
                       HInductionVarAnalysis::InductionInfo* info2,
-                      HInductionVarAnalysis::InductionInfo* trip, int32_t fail_value);
+                      HInductionVarAnalysis::InductionInfo* trip,
+                      int32_t fail_value);
   static Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
                       HInductionVarAnalysis::InductionInfo* info2,
-                      HInductionVarAnalysis::InductionInfo* trip, int32_t fail_value);
+                      HInductionVarAnalysis::InductionInfo* trip,
+                      int32_t fail_value);
 
   static Value AddValue(Value v1, Value v2, int32_t fail_value);
   static Value SubValue(Value v1, Value v2, int32_t fail_value);
@@ -93,6 +97,7 @@
   /** Results of prior induction variable analysis. */
   HInductionVarAnalysis *induction_analysis_;
 
+  friend class HInductionVarAnalysis;
   friend class InductionVarRangeTest;
 
   DISALLOW_COPY_AND_ASSIGN(InductionVarRange);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0547ce8..efd4fcf 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -423,8 +423,8 @@
   }
 
   bool has_throw_predecessor = false;
-  for (size_t i = 0, e = exit_block->GetPredecessors().Size(); i < e; ++i) {
-    if (exit_block->GetPredecessors().Get(i)->GetLastInstruction()->IsThrow()) {
+  for (HBasicBlock* predecessor : exit_block->GetPredecessors()) {
+    if (predecessor->GetLastInstruction()->IsThrow()) {
       has_throw_predecessor = true;
       break;
     }
@@ -506,7 +506,7 @@
       ReferenceTypeInfo::TypeHandle return_handle =
         handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
       return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
-         return_handle, return_handle->IsFinal() /* is_exact */));
+         return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
     }
   }
 
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 41c239d..b71fdb8 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -125,6 +125,28 @@
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
+    case kIntrinsicRotateRight:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerRotateRight;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongRotateRight;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+    case kIntrinsicRotateLeft:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerRotateLeft;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongRotateLeft;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+
+    // Misc data processing.
     case kIntrinsicNumberOfLeadingZeros:
       switch (GetType(method.d.data, true)) {
         case Primitive::kPrimInt:
@@ -135,6 +157,16 @@
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
+    case kIntrinsicNumberOfTrailingZeros:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerNumberOfTrailingZeros;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongNumberOfTrailingZeros;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
 
     // Abs.
     case kIntrinsicAbsDouble:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 6040a40..cc8ddb6 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -266,6 +266,227 @@
   GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
 }
 
+static void GenNumberOfTrailingZeros(LocationSummary* locations,
+                                     Primitive::Type type,
+                                     ArmAssembler* assembler) {
+  DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (type == Primitive::kPrimLong) {
+    Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Label end;
+    __ rbit(out, in_reg_lo);
+    __ clz(out, out);
+    __ CompareAndBranchIfNonZero(in_reg_lo, &end);
+    __ rbit(out, in_reg_hi);
+    __ clz(out, out);
+    __ AddConstant(out, 32);
+    __ Bind(&end);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    __ rbit(out, in);
+    __ clz(out, out);
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenIntegerRotate(LocationSummary* locations,
+                             ArmAssembler* assembler,
+                             bool is_left) {
+  Register in = locations->InAt(0).AsRegister<Register>();
+  Location rhs = locations->InAt(1);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (rhs.IsConstant()) {
+    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
+    // so map all rotations to a +ve. equivalent in that range.
+    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
+    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue() & 0x1F;
+    if (rot) {
+      // Rotate, mapping left rotations to right equivalents if necessary.
+      // (e.g. left by 2 bits == right by 30.)
+      __ Ror(out, in, is_left ? (0x20 - rot) : rot);
+    } else if (out != in) {
+      __ Mov(out, in);
+    }
+  } else {
+    if (is_left) {
+      __ rsb(out, rhs.AsRegister<Register>(), ShifterOperand(0));
+      __ Ror(out, in, out);
+    } else {
+      __ Ror(out, in, rhs.AsRegister<Register>());
+    }
+  }
+}
+
+// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
+// rotates by swapping input regs (effectively rotating by the first 32-bits of
+// a larger rotation) or flipping direction (thus treating larger right/left
+// rotations as sub-word sized rotations in the other direction) as appropriate.
+static void GenLongRotate(LocationSummary* locations,
+                          ArmAssembler* assembler,
+                          bool is_left) {
+  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Location rhs = locations->InAt(1);
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  if (rhs.IsConstant()) {
+    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue();
+    // Map all left rotations to right equivalents.
+    if (is_left) {
+      rot = 0x40 - rot;
+    }
+    // Map all rotations to +ve. equivalents on the interval [0,63].
+    rot &= 0x3F;
+    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
+    // logic below to a simple pair of binary orr.
+    // (e.g. 34 bits == in_reg swap + 2 bits right.)
+    if (rot >= 0x20) {
+      rot -= 0x20;
+      std::swap(in_reg_hi, in_reg_lo);
+    }
+    // Rotate, or mov to out for zero or word size rotations.
+    if (rot) {
+      __ Lsr(out_reg_hi, in_reg_hi, rot);
+      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, 0x20 - rot));
+      __ Lsr(out_reg_lo, in_reg_lo, rot);
+      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, 0x20 - rot));
+    } else {
+      __ Mov(out_reg_lo, in_reg_lo);
+      __ Mov(out_reg_hi, in_reg_hi);
+    }
+  } else {
+    Register shift_left = locations->GetTemp(0).AsRegister<Register>();
+    Register shift_right = locations->GetTemp(1).AsRegister<Register>();
+    Label end;
+    Label right;
+
+    __ and_(shift_left, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
+    __ Lsrs(shift_right, rhs.AsRegister<Register>(), 6);
+    __ rsb(shift_right, shift_left, ShifterOperand(0x20), AL, kCcKeep);
+
+    if (is_left) {
+      __ b(&right, CS);
+    } else {
+      __ b(&right, CC);
+      std::swap(shift_left, shift_right);
+    }
+
+    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
+    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
+    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ Lsr(shift_left, in_reg_hi, shift_right);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
+    __ b(&end);
+
+    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
+    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
+    __ Bind(&right);
+    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ Lsl(shift_right, in_reg_hi, shift_left);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
+
+    __ Bind(&end);
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) {
+  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (invoke->InputAt(1)->IsConstant()) {
+    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) {
+  GenLongRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) {
+  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (invoke->InputAt(1)->IsConstant()) {
+    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) {
+  GenLongRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
+}
+
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 1dbca34..b0cfd0d 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -41,12 +41,12 @@
 using helpers::FPRegisterFrom;
 using helpers::HeapOperand;
 using helpers::LocationFrom;
+using helpers::OperandFrom;
 using helpers::RegisterFrom;
 using helpers::SRegisterFrom;
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
 
-
 namespace {
 
 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
@@ -286,6 +286,131 @@
   GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
+static void GenNumberOfTrailingZeros(LocationSummary* locations,
+                                     Primitive::Type type,
+                                     vixl::MacroAssembler* masm) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  Location in = locations->InAt(0);
+  Location out = locations->Out();
+
+  __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
+  __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
+}
+
+static void GenRotateRight(LocationSummary* locations,
+                           Primitive::Type type,
+                           vixl::MacroAssembler* masm) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  Location in = locations->InAt(0);
+  Location out = locations->Out();
+  Operand rhs = OperandFrom(locations->InAt(1), type);
+
+  if (rhs.IsImmediate()) {
+    uint32_t shift = rhs.immediate() & (RegisterFrom(in, type).SizeInBits() - 1);
+    __ Ror(RegisterFrom(out, type),
+           RegisterFrom(in, type),
+           shift);
+  } else {
+    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
+    __ Ror(RegisterFrom(out, type),
+           RegisterFrom(in, type),
+           rhs.reg());
+  }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerRotateRight(HInvoke* invoke) {
+  GenRotateRight(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongRotateRight(HInvoke* invoke) {
+  GenRotateRight(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
+}
+
+static void GenRotateLeft(LocationSummary* locations,
+                           Primitive::Type type,
+                           vixl::MacroAssembler* masm) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  Location in = locations->InAt(0);
+  Location out = locations->Out();
+  Operand rhs = OperandFrom(locations->InAt(1), type);
+
+  if (rhs.IsImmediate()) {
+    uint32_t regsize = RegisterFrom(in, type).SizeInBits();
+    uint32_t shift = (regsize - rhs.immediate()) & (regsize - 1);
+    __ Ror(RegisterFrom(out, type), RegisterFrom(in, type), shift);
+  } else {
+    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
+    __ Neg(RegisterFrom(out, type),
+           Operand(RegisterFrom(locations->InAt(1), type)));
+    __ Ror(RegisterFrom(out, type),
+           RegisterFrom(in, type),
+           RegisterFrom(out, type));
+  }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
+  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongRotateLeft(HInvoke* invoke) {
+  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
+}
+
 static void GenReverse(LocationSummary* locations,
                        Primitive::Type type,
                        vixl::MacroAssembler* masm) {
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 7e5339e..bfe5e55 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -29,9 +29,15 @@
   V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache) \
   V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache) \
   V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache) \
+  V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache) \
+  V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache) \
+  V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache) \
   V(LongReverse, kStatic, kNeedsEnvironmentOrCache) \
   V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache) \
   V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache) \
+  V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache) \
+  V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache) \
+  V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache) \
   V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache) \
   V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache) \
   V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache) \
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index daf56d0..e302317 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -507,7 +507,7 @@
 
   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
 
-  Label nan, done, op2_label;
+  NearLabel nan, done, op2_label;
   if (is_double) {
     __ ucomisd(out, op2);
   } else {
@@ -841,7 +841,7 @@
   Register out = locations->Out().AsRegister<Register>();
   XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
   XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-  Label done, nan;
+  NearLabel done, nan;
   X86Assembler* assembler = GetAssembler();
 
   // Generate 0.5 into inPlusPointFive.
@@ -888,9 +888,9 @@
 void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
 
-  // Location of reference to data array
+  // Location of reference to data array.
   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count
+  // Location of count.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
 
   Register obj = locations->InAt(0).AsRegister<Register>();
@@ -917,6 +917,183 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  // We need at least two of the positions or length to be an integer constant,
+  // or else we won't have enough free registers.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+  int num_constants =
+      ((src_pos != nullptr) ? 1 : 0)
+      + ((dest_pos != nullptr) ? 1 : 0)
+      + ((length != nullptr) ? 1 : 0);
+
+  if (num_constants < 2) {
+    // Not enough free registers.
+    return;
+  }
+
+  // As long as we are checking, we might as well check to see if the src and dest
+  // positions are >= 0.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // And since we are already checking, check the length too.
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  // Okay, it is safe to generate inline code.
+  LocationSummary* locations =
+    new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
+  // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+  locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+  // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
+  locations->AddTemp(Location::RegisterLocation(ESI));
+  locations->AddTemp(Location::RegisterLocation(EDI));
+  locations->AddTemp(Location::RegisterLocation(ECX));
+}
+
+static void CheckPosition(X86Assembler* assembler,
+                          Location pos,
+                          Register input,
+                          Register length,
+                          SlowPathCodeX86* slow_path,
+                          Register input_len,
+                          Register temp) {
+  // Where is the length in the String?
+  const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+    if (pos_const == 0) {
+      // Check that length(input) >= length.
+      __ cmpl(Address(input, length_offset), length);
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      // Check that length(input) >= pos.
+      __ movl(input_len, Address(input, length_offset));
+      __ cmpl(input_len, Immediate(pos_const));
+      __ j(kLess, slow_path->GetEntryLabel());
+
+      // Check that (length(input) - pos) >= length.
+      __ leal(temp, Address(input_len, -pos_const));
+      __ cmpl(temp, length);
+      __ j(kLess, slow_path->GetEntryLabel());
+    }
+  } else {
+    // Check that pos >= 0.
+    Register pos_reg = pos.AsRegister<Register>();
+    __ testl(pos_reg, pos_reg);
+    __ j(kLess, slow_path->GetEntryLabel());
+
+    // Check that pos <= length(input).
+    __ cmpl(Address(input, length_offset), pos_reg);
+    __ j(kLess, slow_path->GetEntryLabel());
+
+    // Check that (length(input) - pos) >= length.
+    __ movl(temp, Address(input, length_offset));
+    __ subl(temp, pos_reg);
+    __ cmpl(temp, length);
+    __ j(kLess, slow_path->GetEntryLabel());
+  }
+}
+
+void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register src = locations->InAt(0).AsRegister<Register>();
+  Location srcPos = locations->InAt(1);
+  Register dest = locations->InAt(2).AsRegister<Register>();
+  Location destPos = locations->InAt(3);
+  Location length = locations->InAt(4);
+
+  // Temporaries that we need for MOVSW.
+  Register src_base = locations->GetTemp(0).AsRegister<Register>();
+  DCHECK_EQ(src_base, ESI);
+  Register dest_base = locations->GetTemp(1).AsRegister<Register>();
+  DCHECK_EQ(dest_base, EDI);
+  Register count = locations->GetTemp(2).AsRegister<Register>();
+  DCHECK_EQ(count, ECX);
+
+  SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Bail out if the source and destination are the same (to handle overlap).
+  __ cmpl(src, dest);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  // Bail out if the source is null.
+  __ testl(src, src);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  // Bail out if the destination is null.
+  __ testl(dest, dest);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant()) {
+    __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
+    __ j(kLess, slow_path->GetEntryLabel());
+  }
+
+  // We need the count in ECX.
+  if (length.IsConstant()) {
+    __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+  } else {
+    __ movl(count, length.AsRegister<Register>());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
+
+  // Validity checks: dest.
+  CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
+
+  // Okay, everything checks out.  Finally time to do the copy.
+  // Check assumption that sizeof(Char) is 2 (used in scaling below).
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+  if (srcPos.IsConstant()) {
+    int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
+  } else {
+    __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
+                              ScaleFactor::TIMES_2, data_offset));
+  }
+  if (destPos.IsConstant()) {
+    int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
+
+    __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
+  } else {
+    __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
+                               ScaleFactor::TIMES_2, data_offset));
+  }
+
+  // Do the move.
+  __ rep_movsw();
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -970,9 +1147,7 @@
   Register edi = locations->GetTemp(1).AsRegister<Register>();
   Register esi = locations->Out().AsRegister<Register>();
 
-  Label end;
-  Label return_true;
-  Label return_false;
+  NearLabel end, return_true, return_false;
 
   // Get offsets of count, value, and class fields within a string object.
   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1004,8 +1179,7 @@
   __ cmpl(ecx, Address(arg, count_offset));
   __ j(kNotEqual, &return_false);
   // Return true if both strings are empty.
-  __ testl(ecx, ecx);
-  __ j(kEqual, &return_true);
+  __ jecxz(&return_true);
 
   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
   __ leal(esi, Address(str, value_offset));
@@ -1115,7 +1289,7 @@
 
   // Do a zero-length check.
   // TODO: Support jecxz.
-  Label not_found_label;
+  NearLabel not_found_label;
   __ testl(string_length, string_length);
   __ j(kEqual, &not_found_label);
 
@@ -1158,7 +1332,7 @@
   __ subl(string_length, counter);
   __ leal(out, Address(string_length, -1));
 
-  Label done;
+  NearLabel done;
   __ jmp(&done);
 
   // Failed to match; return -1.
@@ -1878,7 +2052,7 @@
     }
 
     // BSR sets ZF if the input was zero, and the output is undefined.
-    Label all_zeroes, done;
+    NearLabel all_zeroes, done;
     __ j(kEqual, &all_zeroes);
 
     // Correct the result from BSR to get the final CLZ result.
@@ -1897,7 +2071,7 @@
   DCHECK(src.IsRegisterPair());
   Register src_lo = src.AsRegisterPairLow<Register>();
   Register src_hi = src.AsRegisterPairHigh<Register>();
-  Label handle_low, done, all_zeroes;
+  NearLabel handle_low, done, all_zeroes;
 
   // Is the high word zero?
   __ testl(src_hi, src_hi);
@@ -1954,8 +2128,13 @@
 
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros)
+UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index f78a726..51980af 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -405,7 +405,7 @@
 
   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
 
-  Label nan, done, op2_label;
+  NearLabel nan, done, op2_label;
   if (is_double) {
     __ ucomisd(out, op2);
   } else {
@@ -702,7 +702,7 @@
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  Label done, nan;
+  NearLabel done, nan;
   X86_64Assembler* assembler = GetAssembler();
 
   // Load 0.5 into inPlusPointFive.
@@ -750,7 +750,7 @@
   XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-  Label done, nan;
+  NearLabel done, nan;
   X86_64Assembler* assembler = GetAssembler();
 
   // Load 0.5 into inPlusPointFive.
@@ -797,9 +797,9 @@
 void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
   LocationSummary* locations = invoke->GetLocations();
 
-  // Location of reference to data array
+  // Location of reference to data array.
   const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count
+  // Location of count.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
 
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
@@ -826,6 +826,171 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be > 0.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+  locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+  // And we need some temporaries.  We will use REP MOVSW, so we need fixed registers.
+  locations->AddTemp(Location::RegisterLocation(RSI));
+  locations->AddTemp(Location::RegisterLocation(RDI));
+  locations->AddTemp(Location::RegisterLocation(RCX));
+}
+
+static void CheckPosition(X86_64Assembler* assembler,
+                          Location pos,
+                          CpuRegister input,
+                          CpuRegister length,
+                          SlowPathCodeX86_64* slow_path,
+                          CpuRegister input_len,
+                          CpuRegister temp) {
+  // Where is the length in the String?
+  const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+    if (pos_const == 0) {
+      // Check that length(input) >= length.
+      __ cmpl(Address(input, length_offset), length);
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      // Check that length(input) >= pos.
+      __ movl(input_len, Address(input, length_offset));
+      __ cmpl(input_len, Immediate(pos_const));
+      __ j(kLess, slow_path->GetEntryLabel());
+
+      // Check that (length(input) - pos) >= length.
+      __ leal(temp, Address(input_len, -pos_const));
+      __ cmpl(temp, length);
+      __ j(kLess, slow_path->GetEntryLabel());
+    }
+  } else {
+    // Check that pos >= 0.
+    CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
+    __ testl(pos_reg, pos_reg);
+    __ j(kLess, slow_path->GetEntryLabel());
+
+    // Check that pos <= length(input).
+    __ cmpl(Address(input, length_offset), pos_reg);
+    __ j(kLess, slow_path->GetEntryLabel());
+
+    // Check that (length(input) - pos) >= length.
+    __ movl(temp, Address(input, length_offset));
+    __ subl(temp, pos_reg);
+    __ cmpl(temp, length);
+    __ j(kLess, slow_path->GetEntryLabel());
+  }
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
+  Location srcPos = locations->InAt(1);
+  CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
+  Location destPos = locations->InAt(3);
+  Location length = locations->InAt(4);
+
+  // Temporaries that we need for MOVSW.
+  CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
+  DCHECK_EQ(src_base.AsRegister(), RSI);
+  CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
+  DCHECK_EQ(dest_base.AsRegister(), RDI);
+  CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
+  DCHECK_EQ(count.AsRegister(), RCX);
+
+  SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  // Bail out if the source and destination are the same.
+  __ cmpl(src, dest);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  // Bail out if the source is null.
+  __ testl(src, src);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  // Bail out if the destination is null.
+  __ testl(dest, dest);
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant()) {
+    __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
+    __ j(kLess, slow_path->GetEntryLabel());
+  }
+
+  // We need the count in RCX.
+  if (length.IsConstant()) {
+    __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+  } else {
+    __ movl(count, length.AsRegister<CpuRegister>());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
+
+  // Validity checks: dest.
+  CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
+
+  // Okay, everything checks out.  Finally time to do the copy.
+  // Check assumption that sizeof(Char) is 2 (used in scaling below).
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+  if (srcPos.IsConstant()) {
+    int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
+  } else {
+    __ leal(src_base, Address(src, srcPos.AsRegister<CpuRegister>(),
+                              ScaleFactor::TIMES_2, data_offset));
+  }
+  if (destPos.IsConstant()) {
+    int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
+  } else {
+    __ leal(dest_base, Address(dest, destPos.AsRegister<CpuRegister>(),
+                               ScaleFactor::TIMES_2, data_offset));
+  }
+
+  // Do the move.
+  __ rep_movsw();
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
@@ -879,9 +1044,7 @@
   CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
   CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
 
-  Label end;
-  Label return_true;
-  Label return_false;
+  NearLabel end, return_true, return_false;
 
   // Get offsets of count, value, and class fields within a string object.
   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -913,8 +1076,7 @@
   __ cmpl(rcx, Address(arg, count_offset));
   __ j(kNotEqual, &return_false);
   // Return true if both strings are empty.
-  __ testl(rcx, rcx);
-  __ j(kEqual, &return_true);
+  __ jrcxz(&return_true);
 
   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
   __ leal(rsi, Address(str, value_offset));
@@ -1024,7 +1186,7 @@
 
   // Do a length check.
   // TODO: Support jecxz.
-  Label not_found_label;
+  NearLabel not_found_label;
   __ testl(string_length, string_length);
   __ j(kEqual, &not_found_label);
 
@@ -1066,7 +1228,7 @@
   __ subl(string_length, counter);
   __ leal(out, Address(string_length, -1));
 
-  Label done;
+  NearLabel done;
   __ jmp(&done);
 
   // Failed to match; return -1.
@@ -1731,7 +1893,7 @@
   }
 
   // BSR sets ZF if the input was zero, and the output is undefined.
-  Label is_zero, done;
+  NearLabel is_zero, done;
   __ j(kEqual, &is_zero);
 
   // Correct the result from BSR to get the CLZ result.
@@ -1772,8 +1934,13 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
-UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros)
+UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index bc4a663..ec4a9ec 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -63,8 +63,8 @@
     // Provide boiler-plate instructions.
     parameter_ = new (&allocator_) HParameterValue(0, Primitive::kPrimNot);
     entry_->AddInstruction(parameter_);
-    constant_ = new (&allocator_) HConstant(Primitive::kPrimInt);
-    loop_preheader_->AddInstruction(constant_);
+    constant_ = graph_->GetIntConstant(42);
+    loop_preheader_->AddInstruction(new (&allocator_) HGoto());
     loop_header_->AddInstruction(new (&allocator_) HIf(parameter_));
     loop_body_->AddInstruction(new (&allocator_) HGoto());
     exit_->AddInstruction(new (&allocator_) HExit());
@@ -99,23 +99,6 @@
 // The actual LICM tests.
 //
 
-TEST_F(LICMTest, ConstantHoisting) {
-  BuildLoop();
-
-  // Populate the loop with instructions: set array to constant.
-  HInstruction* constant = new (&allocator_) HConstant(Primitive::kPrimDouble);
-  loop_body_->InsertInstructionBefore(constant, loop_body_->GetLastInstruction());
-  HInstruction* set_array = new (&allocator_) HArraySet(
-      parameter_, constant_, constant, Primitive::kPrimDouble, 0);
-  loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction());
-
-  EXPECT_EQ(constant->GetBlock(), loop_body_);
-  EXPECT_EQ(set_array->GetBlock(), loop_body_);
-  PerformLICM();
-  EXPECT_EQ(constant->GetBlock(), loop_preheader_);
-  EXPECT_EQ(set_array->GetBlock(), loop_body_);
-}
-
 TEST_F(LICMTest, FieldHoisting) {
   BuildLoop();
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 650c8e5..cc12a10 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -20,6 +20,7 @@
 #include "ssa_builder.h"
 #include "base/bit_vector-inl.h"
 #include "base/bit_utils.h"
+#include "mirror/class-inl.h"
 #include "utils/growable_array.h"
 #include "scoped_thread_state_change.h"
 
@@ -68,8 +69,8 @@
     if (!visited.IsBitSet(i)) {
       HBasicBlock* block = blocks_.Get(i);
       // We only need to update the successor, which might be live.
-      for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
-        block->GetSuccessors().Get(j)->RemovePredecessor(block);
+      for (HBasicBlock* successor : block->GetSuccessors()) {
+        successor->RemovePredecessor(block);
       }
       // Remove the block from the list of blocks, so that further analyses
       // never see it.
@@ -86,8 +87,7 @@
 
   visited->SetBit(id);
   visiting->SetBit(id);
-  for (size_t i = 0; i < block->GetSuccessors().Size(); i++) {
-    HBasicBlock* successor = block->GetSuccessors().Get(i);
+  for (HBasicBlock* successor : block->GetSuccessors()) {
     if (visiting->IsBitSet(successor->GetBlockId())) {
       successor->AddBackEdge(block);
     } else {
@@ -134,7 +134,7 @@
 }
 
 void HBasicBlock::ClearDominanceInformation() {
-  dominated_blocks_.Reset();
+  dominated_blocks_.clear();
   dominator_ = nullptr;
 }
 
@@ -143,8 +143,8 @@
   GrowableArray<size_t> visits(arena_, blocks_.Size());
   visits.SetSize(blocks_.Size());
   reverse_post_order_.Add(entry_block_);
-  for (size_t i = 0; i < entry_block_->GetSuccessors().Size(); i++) {
-    VisitBlockForDominatorTree(entry_block_->GetSuccessors().Get(i), entry_block_, &visits);
+  for (HBasicBlock* successor : entry_block_->GetSuccessors()) {
+    VisitBlockForDominatorTree(successor, entry_block_, &visits);
   }
 }
 
@@ -179,11 +179,11 @@
   // Once all the forward edges have been visited, we know the immediate
   // dominator of the block. We can then start visiting its successors.
   if (visits->Get(block->GetBlockId()) ==
-      block->GetPredecessors().Size() - block->NumberOfBackEdges()) {
+      block->GetPredecessors().size() - block->NumberOfBackEdges()) {
     block->GetDominator()->AddDominatedBlock(block);
     reverse_post_order_.Add(block);
-    for (size_t i = 0; i < block->GetSuccessors().Size(); i++) {
-      VisitBlockForDominatorTree(block->GetSuccessors().Get(i), block, visits);
+    for (HBasicBlock* successor : block->GetSuccessors()) {
+      VisitBlockForDominatorTree(successor, block, visits);
     }
   }
 }
@@ -224,14 +224,14 @@
   // Make sure the loop has only one pre header. This simplifies SSA building by having
   // to just look at the pre header to know which locals are initialized at entry of the
   // loop.
-  size_t number_of_incomings = header->GetPredecessors().Size() - info->NumberOfBackEdges();
+  size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges();
   if (number_of_incomings != 1) {
     HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
     AddBlock(pre_header);
     pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
 
-    for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) {
-      HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
+    for (size_t pred = 0; pred < header->GetPredecessors().size(); ++pred) {
+      HBasicBlock* predecessor = header->GetPredecessor(pred);
       if (!info->IsBackEdge(*predecessor)) {
         predecessor->ReplaceSuccessor(header, pre_header);
         pred--;
@@ -241,13 +241,13 @@
   }
 
   // Make sure the first predecessor of a loop header is the incoming block.
-  if (info->IsBackEdge(*header->GetPredecessors().Get(0))) {
-    HBasicBlock* to_swap = header->GetPredecessors().Get(0);
-    for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) {
-      HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
+  if (info->IsBackEdge(*header->GetPredecessor(0))) {
+    HBasicBlock* to_swap = header->GetPredecessor(0);
+    for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) {
+      HBasicBlock* predecessor = header->GetPredecessor(pred);
       if (!info->IsBackEdge(*predecessor)) {
-        header->predecessors_.Put(pred, to_swap);
-        header->predecessors_.Put(0, predecessor);
+        header->predecessors_[pred] = to_swap;
+        header->predecessors_[0] = predecessor;
         break;
       }
     }
@@ -267,7 +267,7 @@
 }
 
 static bool CheckIfPredecessorAtIsExceptional(const HBasicBlock& block, size_t pred_idx) {
-  HBasicBlock* predecessor = block.GetPredecessors().Get(pred_idx);
+  HBasicBlock* predecessor = block.GetPredecessor(pred_idx);
   if (!predecessor->EndsWithTryBoundary()) {
     // Only edges from HTryBoundary can be exceptional.
     return false;
@@ -296,7 +296,7 @@
     }
 
     bool exceptional_predecessors_only = true;
-    for (size_t j = 0; j < catch_block->GetPredecessors().Size(); ++j) {
+    for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
       if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
         exceptional_predecessors_only = false;
         break;
@@ -313,9 +313,9 @@
       // a MOVE_EXCEPTION instruction, as guaranteed by the verifier.
       DCHECK(!catch_block->GetFirstInstruction()->IsLoadException());
       HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction());
-      for (size_t j = 0; j < catch_block->GetPredecessors().Size(); ++j) {
+      for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
         if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-          catch_block->GetPredecessors().Get(j)->ReplaceSuccessor(catch_block, normal_block);
+          catch_block->GetPredecessor(j)->ReplaceSuccessor(catch_block, normal_block);
           --j;
         }
       }
@@ -337,7 +337,7 @@
     // Infer try membership from the first predecessor. Having simplified loops,
     // the first predecessor can never be a back edge and therefore it must have
     // been visited already and had its try membership set.
-    HBasicBlock* first_predecessor = block->GetPredecessors().Get(0);
+    HBasicBlock* first_predecessor = block->GetPredecessor(0);
     DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor));
     const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors();
     if (try_entry != nullptr) {
@@ -346,16 +346,6 @@
   }
 }
 
-bool HGraph::HasTryCatch() const {
-  for (size_t i = 0, e = blocks_.Size(); i < e; ++i) {
-    HBasicBlock* block = blocks_.Get(i);
-    if (block != nullptr && (block->IsTryBlock() || block->IsCatchBlock())) {
-      return true;
-    }
-  }
-  return false;
-}
-
 void HGraph::SimplifyCFG() {
   // Simplify the CFG for future analysis, and code generation:
   // (1): Split critical edges.
@@ -364,10 +354,10 @@
     HBasicBlock* block = blocks_.Get(i);
     if (block == nullptr) continue;
     if (block->NumberOfNormalSuccessors() > 1) {
-      for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
-        HBasicBlock* successor = block->GetSuccessors().Get(j);
+      for (size_t j = 0; j < block->GetSuccessors().size(); ++j) {
+        HBasicBlock* successor = block->GetSuccessor(j);
         DCHECK(!successor->IsCatchBlock());
-        if (successor->GetPredecessors().Size() > 1) {
+        if (successor->GetPredecessors().size() > 1) {
           SplitCriticalEdge(block, successor);
           --j;
         }
@@ -486,8 +476,8 @@
 
   blocks_.SetBit(block->GetBlockId());
   block->SetInLoop(this);
-  for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
-    PopulateRecursive(block->GetPredecessors().Get(i));
+  for (HBasicBlock* predecessor : block->GetPredecessors()) {
+    PopulateRecursive(predecessor);
   }
 }
 
@@ -1138,12 +1128,11 @@
   new_block->instructions_.SetBlockOfInstructions(new_block);
   AddInstruction(new (GetGraph()->GetArena()) HGoto(new_block->GetDexPc()));
 
-  for (size_t i = 0, e = GetSuccessors().Size(); i < e; ++i) {
-    HBasicBlock* successor = GetSuccessors().Get(i);
-    new_block->successors_.Add(successor);
-    successor->predecessors_.Put(successor->GetPredecessorIndexOf(this), new_block);
+  for (HBasicBlock* successor : GetSuccessors()) {
+    new_block->successors_.push_back(successor);
+    successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
   }
-  successors_.Reset();
+  successors_.clear();
   AddSuccessor(new_block);
 
   GetGraph()->AddBlock(new_block);
@@ -1163,19 +1152,17 @@
   instructions_.last_instruction_ = cursor;
 
   new_block->instructions_.SetBlockOfInstructions(new_block);
-  for (size_t i = 0, e = GetSuccessors().Size(); i < e; ++i) {
-    HBasicBlock* successor = GetSuccessors().Get(i);
-    new_block->successors_.Add(successor);
-    successor->predecessors_.Put(successor->GetPredecessorIndexOf(this), new_block);
+  for (HBasicBlock* successor : GetSuccessors()) {
+    new_block->successors_.push_back(successor);
+    successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
   }
-  successors_.Reset();
+  successors_.clear();
 
-  for (size_t i = 0, e = GetDominatedBlocks().Size(); i < e; ++i) {
-    HBasicBlock* dominated = GetDominatedBlocks().Get(i);
+  for (HBasicBlock* dominated : GetDominatedBlocks()) {
     dominated->dominator_ = new_block;
-    new_block->dominated_blocks_.Add(dominated);
+    new_block->dominated_blocks_.push_back(dominated);
   }
-  dominated_blocks_.Reset();
+  dominated_blocks_.clear();
   return new_block;
 }
 
@@ -1228,7 +1215,7 @@
 }
 
 bool HTryBoundary::HasSameExceptionHandlersAs(const HTryBoundary& other) const {
-  if (GetBlock()->GetSuccessors().Size() != other.GetBlock()->GetSuccessors().Size()) {
+  if (GetBlock()->GetSuccessors().size() != other.GetBlock()->GetSuccessors().size()) {
     return false;
   }
 
@@ -1288,7 +1275,7 @@
   // Dominators must be removed after all the blocks they dominate. This way
   // a loop header is removed last, a requirement for correct loop information
   // iteration.
-  DCHECK(dominated_blocks_.IsEmpty());
+  DCHECK(dominated_blocks_.empty());
 
   // Remove the block from all loops it is included in.
   for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) {
@@ -1304,36 +1291,34 @@
 
   // Disconnect the block from its predecessors and update their control-flow
   // instructions.
-  for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
-    HBasicBlock* predecessor = predecessors_.Get(i);
+  for (HBasicBlock* predecessor : predecessors_) {
     HInstruction* last_instruction = predecessor->GetLastInstruction();
     predecessor->RemoveInstruction(last_instruction);
     predecessor->RemoveSuccessor(this);
-    if (predecessor->GetSuccessors().Size() == 1u) {
+    if (predecessor->GetSuccessors().size() == 1u) {
       DCHECK(last_instruction->IsIf());
       predecessor->AddInstruction(new (graph_->GetArena()) HGoto(last_instruction->GetDexPc()));
     } else {
       // The predecessor has no remaining successors and therefore must be dead.
       // We deliberately leave it without a control-flow instruction so that the
       // SSAChecker fails unless it is not removed during the pass too.
-      DCHECK_EQ(predecessor->GetSuccessors().Size(), 0u);
+      DCHECK_EQ(predecessor->GetSuccessors().size(), 0u);
     }
   }
-  predecessors_.Reset();
+  predecessors_.clear();
 
   // Disconnect the block from its successors and update their phis.
-  for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
-    HBasicBlock* successor = successors_.Get(i);
+  for (HBasicBlock* successor : successors_) {
     // Delete this block from the list of predecessors.
     size_t this_index = successor->GetPredecessorIndexOf(this);
-    successor->predecessors_.DeleteAt(this_index);
+    successor->predecessors_.erase(successor->predecessors_.begin() + this_index);
 
     // Check that `successor` has other predecessors, otherwise `this` is the
     // dominator of `successor` which violates the order DCHECKed at the top.
-    DCHECK(!successor->predecessors_.IsEmpty());
+    DCHECK(!successor->predecessors_.empty());
 
     // Remove this block's entries in the successor's phis.
-    if (successor->predecessors_.Size() == 1u) {
+    if (successor->predecessors_.size() == 1u) {
       // The successor has just one predecessor left. Replace phis with the only
       // remaining input.
       for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
@@ -1347,7 +1332,7 @@
       }
     }
   }
-  successors_.Reset();
+  successors_.clear();
 
   // Disconnect from the dominator.
   dominator_->RemoveDominatedBlock(this);
@@ -1361,11 +1346,9 @@
 
 void HBasicBlock::MergeWith(HBasicBlock* other) {
   DCHECK_EQ(GetGraph(), other->GetGraph());
-  DCHECK(GetDominatedBlocks().Contains(other));
-  DCHECK_EQ(GetSuccessors().Size(), 1u);
-  DCHECK_EQ(GetSuccessors().Get(0), other);
-  DCHECK_EQ(other->GetPredecessors().Size(), 1u);
-  DCHECK_EQ(other->GetPredecessors().Get(0), this);
+  DCHECK(ContainsElement(dominated_blocks_, other));
+  DCHECK_EQ(GetSingleSuccessor(), other);
+  DCHECK_EQ(other->GetSinglePredecessor(), this);
   DCHECK(other->GetPhis().IsEmpty());
 
   // Move instructions from `other` to `this`.
@@ -1385,24 +1368,23 @@
   }
 
   // Update links to the successors of `other`.
-  successors_.Reset();
-  while (!other->successors_.IsEmpty()) {
-    HBasicBlock* successor = other->successors_.Get(0);
+  successors_.clear();
+  while (!other->successors_.empty()) {
+    HBasicBlock* successor = other->GetSuccessor(0);
     successor->ReplacePredecessor(other, this);
   }
 
   // Update the dominator tree.
-  dominated_blocks_.Delete(other);
-  for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) {
-    HBasicBlock* dominated = other->GetDominatedBlocks().Get(i);
-    dominated_blocks_.Add(dominated);
+  RemoveDominatedBlock(other);
+  for (HBasicBlock* dominated : other->GetDominatedBlocks()) {
+    dominated_blocks_.push_back(dominated);
     dominated->SetDominator(this);
   }
-  other->dominated_blocks_.Reset();
+  other->dominated_blocks_.clear();
   other->dominator_ = nullptr;
 
   // Clear the list of predecessors of `other` in preparation of deleting it.
-  other->predecessors_.Reset();
+  other->predecessors_.clear();
 
   // Delete `other` from the graph. The function updates reverse post order.
   graph_->DeleteDeadBlock(other);
@@ -1411,11 +1393,10 @@
 
 void HBasicBlock::MergeWithInlined(HBasicBlock* other) {
   DCHECK_NE(GetGraph(), other->GetGraph());
-  DCHECK(GetDominatedBlocks().IsEmpty());
-  DCHECK(GetSuccessors().IsEmpty());
+  DCHECK(GetDominatedBlocks().empty());
+  DCHECK(GetSuccessors().empty());
   DCHECK(!EndsWithControlFlowInstruction());
-  DCHECK_EQ(other->GetPredecessors().Size(), 1u);
-  DCHECK(other->GetPredecessors().Get(0)->IsEntryBlock());
+  DCHECK(other->GetSinglePredecessor()->IsEntryBlock());
   DCHECK(other->GetPhis().IsEmpty());
   DCHECK(!other->IsInLoop());
 
@@ -1424,34 +1405,33 @@
   other->instructions_.SetBlockOfInstructions(this);
 
   // Update links to the successors of `other`.
-  successors_.Reset();
-  while (!other->successors_.IsEmpty()) {
-    HBasicBlock* successor = other->successors_.Get(0);
+  successors_.clear();
+  while (!other->successors_.empty()) {
+    HBasicBlock* successor = other->GetSuccessor(0);
     successor->ReplacePredecessor(other, this);
   }
 
   // Update the dominator tree.
-  for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) {
-    HBasicBlock* dominated = other->GetDominatedBlocks().Get(i);
-    dominated_blocks_.Add(dominated);
+  for (HBasicBlock* dominated : other->GetDominatedBlocks()) {
+    dominated_blocks_.push_back(dominated);
     dominated->SetDominator(this);
   }
-  other->dominated_blocks_.Reset();
+  other->dominated_blocks_.clear();
   other->dominator_ = nullptr;
   other->graph_ = nullptr;
 }
 
 void HBasicBlock::ReplaceWith(HBasicBlock* other) {
-  while (!GetPredecessors().IsEmpty()) {
-    HBasicBlock* predecessor = GetPredecessors().Get(0);
+  while (!GetPredecessors().empty()) {
+    HBasicBlock* predecessor = GetPredecessor(0);
     predecessor->ReplaceSuccessor(this, other);
   }
-  while (!GetSuccessors().IsEmpty()) {
-    HBasicBlock* successor = GetSuccessors().Get(0);
+  while (!GetSuccessors().empty()) {
+    HBasicBlock* successor = GetSuccessor(0);
     successor->ReplacePredecessor(this, other);
   }
-  for (size_t i = 0; i < dominated_blocks_.Size(); ++i) {
-    other->AddDominatedBlock(dominated_blocks_.Get(i));
+  for (HBasicBlock* dominated : GetDominatedBlocks()) {
+    other->AddDominatedBlock(dominated);
   }
   GetDominator()->ReplaceDominatedBlock(this, other);
   other->SetDominator(GetDominator());
@@ -1474,9 +1454,9 @@
 
 void HGraph::DeleteDeadBlock(HBasicBlock* block) {
   DCHECK_EQ(block->GetGraph(), this);
-  DCHECK(block->GetSuccessors().IsEmpty());
-  DCHECK(block->GetPredecessors().IsEmpty());
-  DCHECK(block->GetDominatedBlocks().IsEmpty());
+  DCHECK(block->GetSuccessors().empty());
+  DCHECK(block->GetPredecessors().empty());
+  DCHECK(block->GetDominatedBlocks().empty());
   DCHECK(block->GetDominator() == nullptr);
 
   for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
@@ -1550,16 +1530,16 @@
     HBasicBlock* at = invoke->GetBlock();
     HBasicBlock* to = at->SplitAfter(invoke);
 
-    HBasicBlock* first = entry_block_->GetSuccessors().Get(0);
+    HBasicBlock* first = entry_block_->GetSuccessor(0);
     DCHECK(!first->IsInLoop());
     at->MergeWithInlined(first);
     exit_block_->ReplaceWith(to);
 
     // Update all predecessors of the exit block (now the `to` block)
     // to not `HReturn` but `HGoto` instead.
-    bool returns_void = to->GetPredecessors().Get(0)->GetLastInstruction()->IsReturnVoid();
-    if (to->GetPredecessors().Size() == 1) {
-      HBasicBlock* predecessor = to->GetPredecessors().Get(0);
+    bool returns_void = to->GetPredecessor(0)->GetLastInstruction()->IsReturnVoid();
+    if (to->GetPredecessors().size() == 1) {
+      HBasicBlock* predecessor = to->GetPredecessor(0);
       HInstruction* last = predecessor->GetLastInstruction();
       if (!returns_void) {
         return_value = last->InputAt(0);
@@ -1573,8 +1553,7 @@
             allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke->GetType()), to->GetDexPc());
         to->AddPhi(return_value->AsPhi());
       }
-      for (size_t i = 0, e = to->GetPredecessors().Size(); i < e; ++i) {
-        HBasicBlock* predecessor = to->GetPredecessors().Get(i);
+      for (HBasicBlock* predecessor : to->GetPredecessors()) {
         HInstruction* last = predecessor->GetLastInstruction();
         if (!returns_void) {
           return_value->AsPhi()->AddInput(last->InputAt(0));
@@ -1726,8 +1705,8 @@
   AddBlock(new_pre_header);
 
   header->ReplacePredecessor(pre_header, new_pre_header);
-  pre_header->successors_.Reset();
-  pre_header->dominated_blocks_.Reset();
+  pre_header->successors_.clear();
+  pre_header->dominated_blocks_.clear();
 
   pre_header->AddSuccessor(if_block);
   if_block->AddSuccessor(dummy_block);  // True successor
@@ -1735,15 +1714,15 @@
   dummy_block->AddSuccessor(new_pre_header);
   deopt_block->AddSuccessor(new_pre_header);
 
-  pre_header->dominated_blocks_.Add(if_block);
+  pre_header->dominated_blocks_.push_back(if_block);
   if_block->SetDominator(pre_header);
-  if_block->dominated_blocks_.Add(dummy_block);
+  if_block->dominated_blocks_.push_back(dummy_block);
   dummy_block->SetDominator(if_block);
-  if_block->dominated_blocks_.Add(deopt_block);
+  if_block->dominated_blocks_.push_back(deopt_block);
   deopt_block->SetDominator(if_block);
-  if_block->dominated_blocks_.Add(new_pre_header);
+  if_block->dominated_blocks_.push_back(new_pre_header);
   new_pre_header->SetDominator(if_block);
-  new_pre_header->dominated_blocks_.Add(header);
+  new_pre_header->dominated_blocks_.push_back(header);
   header->SetDominator(new_pre_header);
 
   size_t index_of_header = 0;
@@ -1785,7 +1764,7 @@
       DCHECK(upper_bound_rti.IsSupertypeOf(rti))
           << " upper_bound_rti: " << upper_bound_rti
           << " rti: " << rti;
-      DCHECK(!upper_bound_rti.GetTypeHandle()->IsFinal() || rti.IsExact());
+      DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact());
     }
   }
   reference_type_info_ = rti;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 23d605b..d52a4f7 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -17,11 +17,13 @@
 #ifndef ART_COMPILER_OPTIMIZING_NODES_H_
 #define ART_COMPILER_OPTIMIZING_NODES_H_
 
+#include <algorithm>
 #include <array>
 #include <type_traits>
 
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
+#include "base/stl_util.h"
 #include "dex/compiler_enums.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
@@ -155,6 +157,7 @@
         number_of_in_vregs_(0),
         temporaries_vreg_slots_(0),
         has_bounds_checks_(false),
+        has_try_catch_(false),
         debuggable_(debuggable),
         current_instruction_id_(start_instruction_id),
         dex_file_(dex_file),
@@ -280,7 +283,6 @@
   }
 
   uint16_t GetNumberOfVRegs() const {
-    DCHECK(!in_ssa_form_);
     return number_of_vregs_;
   }
 
@@ -358,8 +360,8 @@
     return instruction_set_;
   }
 
-  // TODO: Remove once the full compilation pipeline is enabled for try/catch.
-  bool HasTryCatch() const;
+  bool HasTryCatch() const { return has_try_catch_; }
+  void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
  private:
   void VisitBlockForDominatorTree(HBasicBlock* block,
@@ -431,6 +433,10 @@
   // Has bounds checks. We can totally skip BCE if it's false.
   bool has_bounds_checks_;
 
+  // Flag whether there are any try/catch blocks in the graph. We will skip
+  // try/catch-related passes if false.
+  bool has_try_catch_;
+
   // Indicates whether the graph should be compiled in a way that
   // ensures full debuggability. If false, we can apply more
   // aggressive optimizations that may limit the level of debugging.
@@ -630,26 +636,44 @@
  public:
   HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc)
       : graph_(graph),
-        predecessors_(graph->GetArena(), kDefaultNumberOfPredecessors),
-        successors_(graph->GetArena(), kDefaultNumberOfSuccessors),
+        predecessors_(graph->GetArena()->Adapter(kArenaAllocPredecessors)),
+        successors_(graph->GetArena()->Adapter(kArenaAllocSuccessors)),
         loop_information_(nullptr),
         dominator_(nullptr),
-        dominated_blocks_(graph->GetArena(), kDefaultNumberOfDominatedBlocks),
+        dominated_blocks_(graph->GetArena()->Adapter(kArenaAllocDominated)),
         block_id_(-1),
         dex_pc_(dex_pc),
         lifetime_start_(kNoLifetime),
         lifetime_end_(kNoLifetime),
-        try_catch_information_(nullptr) {}
+        try_catch_information_(nullptr) {
+    predecessors_.reserve(kDefaultNumberOfPredecessors);
+    successors_.reserve(kDefaultNumberOfSuccessors);
+    dominated_blocks_.reserve(kDefaultNumberOfDominatedBlocks);
+  }
 
-  const GrowableArray<HBasicBlock*>& GetPredecessors() const {
+  const ArenaVector<HBasicBlock*>& GetPredecessors() const {
     return predecessors_;
   }
 
-  const GrowableArray<HBasicBlock*>& GetSuccessors() const {
+  HBasicBlock* GetPredecessor(size_t pred_idx) const {
+    DCHECK_LT(pred_idx, predecessors_.size());
+    return predecessors_[pred_idx];
+  }
+
+  const ArenaVector<HBasicBlock*>& GetSuccessors() const {
     return successors_;
   }
 
-  const GrowableArray<HBasicBlock*>& GetDominatedBlocks() const {
+  HBasicBlock* GetSuccessor(size_t succ_idx) const {
+    DCHECK_LT(succ_idx, successors_.size());
+    return successors_[succ_idx];
+  }
+
+  bool HasSuccessor(const HBasicBlock* block, size_t start_from = 0u) {
+    return ContainsElement(successors_, block, start_from);
+  }
+
+  const ArenaVector<HBasicBlock*>& GetDominatedBlocks() const {
     return dominated_blocks_;
   }
 
@@ -689,18 +713,16 @@
 
   HBasicBlock* GetDominator() const { return dominator_; }
   void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; }
-  void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); }
-  void RemoveDominatedBlock(HBasicBlock* block) { dominated_blocks_.Delete(block); }
-  void ReplaceDominatedBlock(HBasicBlock* existing, HBasicBlock* new_block) {
-    for (size_t i = 0, e = dominated_blocks_.Size(); i < e; ++i) {
-      if (dominated_blocks_.Get(i) == existing) {
-        dominated_blocks_.Put(i, new_block);
-        return;
-      }
-    }
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
+  void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.push_back(block); }
+
+  void RemoveDominatedBlock(HBasicBlock* block) {
+    RemoveElement(dominated_blocks_, block);
   }
+
+  void ReplaceDominatedBlock(HBasicBlock* existing, HBasicBlock* new_block) {
+    ReplaceElement(dominated_blocks_, existing, new_block);
+  }
+
   void ClearDominanceInformation();
 
   int NumberOfBackEdges() const {
@@ -715,24 +737,22 @@
   const HInstructionList& GetPhis() const { return phis_; }
 
   void AddSuccessor(HBasicBlock* block) {
-    successors_.Add(block);
-    block->predecessors_.Add(this);
+    successors_.push_back(block);
+    block->predecessors_.push_back(this);
   }
 
   void ReplaceSuccessor(HBasicBlock* existing, HBasicBlock* new_block) {
     size_t successor_index = GetSuccessorIndexOf(existing);
-    DCHECK_NE(successor_index, static_cast<size_t>(-1));
     existing->RemovePredecessor(this);
-    new_block->predecessors_.Add(this);
-    successors_.Put(successor_index, new_block);
+    new_block->predecessors_.push_back(this);
+    successors_[successor_index] = new_block;
   }
 
   void ReplacePredecessor(HBasicBlock* existing, HBasicBlock* new_block) {
     size_t predecessor_index = GetPredecessorIndexOf(existing);
-    DCHECK_NE(predecessor_index, static_cast<size_t>(-1));
     existing->RemoveSuccessor(this);
-    new_block->successors_.Add(this);
-    predecessors_.Put(predecessor_index, new_block);
+    new_block->successors_.push_back(this);
+    predecessors_[predecessor_index] = new_block;
   }
 
   // Insert `this` between `predecessor` and `successor. This method
@@ -740,85 +760,69 @@
   // `predecessor` and `successor`.
   void InsertBetween(HBasicBlock* predecessor, HBasicBlock* successor) {
     size_t predecessor_index = successor->GetPredecessorIndexOf(predecessor);
-    DCHECK_NE(predecessor_index, static_cast<size_t>(-1));
     size_t successor_index = predecessor->GetSuccessorIndexOf(successor);
-    DCHECK_NE(successor_index, static_cast<size_t>(-1));
-    successor->predecessors_.Put(predecessor_index, this);
-    predecessor->successors_.Put(successor_index, this);
-    successors_.Add(successor);
-    predecessors_.Add(predecessor);
+    successor->predecessors_[predecessor_index] = this;
+    predecessor->successors_[successor_index] = this;
+    successors_.push_back(successor);
+    predecessors_.push_back(predecessor);
   }
 
   void RemovePredecessor(HBasicBlock* block) {
-    predecessors_.Delete(block);
+    predecessors_.erase(predecessors_.begin() + GetPredecessorIndexOf(block));
   }
 
   void RemoveSuccessor(HBasicBlock* block) {
-    successors_.Delete(block);
+    successors_.erase(successors_.begin() + GetSuccessorIndexOf(block));
   }
 
   void ClearAllPredecessors() {
-    predecessors_.Reset();
+    predecessors_.clear();
   }
 
   void AddPredecessor(HBasicBlock* block) {
-    predecessors_.Add(block);
-    block->successors_.Add(this);
+    predecessors_.push_back(block);
+    block->successors_.push_back(this);
   }
 
   void SwapPredecessors() {
-    DCHECK_EQ(predecessors_.Size(), 2u);
-    HBasicBlock* temp = predecessors_.Get(0);
-    predecessors_.Put(0, predecessors_.Get(1));
-    predecessors_.Put(1, temp);
+    DCHECK_EQ(predecessors_.size(), 2u);
+    std::swap(predecessors_[0], predecessors_[1]);
   }
 
   void SwapSuccessors() {
-    DCHECK_EQ(successors_.Size(), 2u);
-    HBasicBlock* temp = successors_.Get(0);
-    successors_.Put(0, successors_.Get(1));
-    successors_.Put(1, temp);
+    DCHECK_EQ(successors_.size(), 2u);
+    std::swap(successors_[0], successors_[1]);
   }
 
   size_t GetPredecessorIndexOf(HBasicBlock* predecessor) const {
-    for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
-      if (predecessors_.Get(i) == predecessor) {
-        return i;
-      }
-    }
-    return -1;
+    return IndexOfElement(predecessors_, predecessor);
   }
 
   size_t GetSuccessorIndexOf(HBasicBlock* successor) const {
-    for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
-      if (successors_.Get(i) == successor) {
-        return i;
-      }
-    }
-    return -1;
+    return IndexOfElement(successors_, successor);
   }
 
   HBasicBlock* GetSinglePredecessor() const {
-    DCHECK_EQ(GetPredecessors().Size(), 1u);
-    return GetPredecessors().Get(0);
+    DCHECK_EQ(GetPredecessors().size(), 1u);
+    return GetPredecessor(0);
   }
 
   HBasicBlock* GetSingleSuccessor() const {
-    DCHECK_EQ(GetSuccessors().Size(), 1u);
-    return GetSuccessors().Get(0);
+    DCHECK_EQ(GetSuccessors().size(), 1u);
+    return GetSuccessor(0);
   }
 
   // Returns whether the first occurrence of `predecessor` in the list of
   // predecessors is at index `idx`.
   bool IsFirstIndexOfPredecessor(HBasicBlock* predecessor, size_t idx) const {
-    DCHECK_EQ(GetPredecessors().Get(idx), predecessor);
+    DCHECK_EQ(GetPredecessor(idx), predecessor);
     return GetPredecessorIndexOf(predecessor) == idx;
   }
 
   // Returns the number of non-exceptional successors. SsaChecker ensures that
   // these are stored at the beginning of the successor list.
   size_t NumberOfNormalSuccessors() const {
-    return EndsWithTryBoundary() ? 1 : GetSuccessors().Size();
+    return EndsWithTryBoundary() ? 1 : GetSuccessors().size();
   }
 
   // Split the block into two blocks just before `cursor`. Returns the newly
@@ -883,8 +887,7 @@
 
   bool IsLoopPreHeaderFirstPredecessor() const {
     DCHECK(IsLoopHeader());
-    DCHECK(!GetPredecessors().IsEmpty());
-    return GetPredecessors().Get(0) == GetLoopInformation()->GetPreHeader();
+    return GetPredecessor(0) == GetLoopInformation()->GetPreHeader();
   }
 
   HLoopInformation* GetLoopInformation() const {
@@ -954,13 +957,13 @@
 
  private:
   HGraph* graph_;
-  GrowableArray<HBasicBlock*> predecessors_;
-  GrowableArray<HBasicBlock*> successors_;
+  ArenaVector<HBasicBlock*> predecessors_;
+  ArenaVector<HBasicBlock*> successors_;
   HInstructionList instructions_;
   HInstructionList phis_;
   HLoopInformation* loop_information_;
   HBasicBlock* dominator_;
-  GrowableArray<HBasicBlock*> dominated_blocks_;
+  ArenaVector<HBasicBlock*> dominated_blocks_;
   int block_id_;
   // The dex program counter of the first instruction of this block.
   const uint32_t dex_pc_;
@@ -2188,6 +2191,8 @@
   virtual bool IsZero() const { return false; }
   virtual bool IsOne() const { return false; }
 
+  virtual uint64_t GetValueAsUint64() const = 0;
+
   DECLARE_INSTRUCTION(Constant);
 
  private:
@@ -2200,6 +2205,8 @@
     return true;
   }
 
+  uint64_t GetValueAsUint64() const OVERRIDE { return 0; }
+
   size_t ComputeHashCode() const OVERRIDE { return 0; }
 
   DECLARE_INSTRUCTION(NullConstant);
@@ -2217,6 +2224,8 @@
  public:
   int32_t GetValue() const { return value_; }
 
+  uint64_t GetValueAsUint64() const OVERRIDE { return static_cast<uint64_t>(value_); }
+
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
     DCHECK(other->IsIntConstant());
     return other->AsIntConstant()->value_ == value_;
@@ -2248,6 +2257,8 @@
  public:
   int64_t GetValue() const { return value_; }
 
+  uint64_t GetValueAsUint64() const OVERRIDE { return value_; }
+
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
     DCHECK(other->IsLongConstant());
     return other->AsLongConstant()->value_ == value_;
@@ -2283,11 +2294,11 @@
   bool IsControlFlow() const OVERRIDE { return true; }
 
   HBasicBlock* IfTrueSuccessor() const {
-    return GetBlock()->GetSuccessors().Get(0);
+    return GetBlock()->GetSuccessor(0);
   }
 
   HBasicBlock* IfFalseSuccessor() const {
-    return GetBlock()->GetSuccessors().Get(1);
+    return GetBlock()->GetSuccessor(1);
   }
 
   DECLARE_INSTRUCTION(If);
@@ -2315,14 +2326,13 @@
   bool IsControlFlow() const OVERRIDE { return true; }
 
   // Returns the block's non-exceptional successor (index zero).
-  HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors().Get(0); }
+  HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessor(0); }
 
   // Returns whether `handler` is among its exception handlers (non-zero index
   // successors).
   bool HasExceptionHandler(const HBasicBlock& handler) const {
     DCHECK(handler.IsCatchBlock());
-    return GetBlock()->GetSuccessors().Contains(
-        const_cast<HBasicBlock*>(&handler), /* start_from */ 1);
+    return GetBlock()->HasSuccessor(&handler, 1u /* Skip first successor. */);
   }
 
   // If not present already, adds `handler` to its block's list of exception
@@ -2352,8 +2362,8 @@
   explicit HExceptionHandlerIterator(const HTryBoundary& try_boundary)
     : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {}
 
-  bool Done() const { return index_ == block_.GetSuccessors().Size(); }
-  HBasicBlock* Current() const { return block_.GetSuccessors().Get(index_); }
+  bool Done() const { return index_ == block_.GetSuccessors().size(); }
+  HBasicBlock* Current() const { return block_.GetSuccessor(index_); }
   size_t CurrentSuccessorIndex() const { return index_; }
   void Advance() { ++index_; }
 
@@ -2868,10 +2878,13 @@
  public:
   float GetValue() const { return value_; }
 
+  uint64_t GetValueAsUint64() const OVERRIDE {
+    return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
+  }
+
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
     DCHECK(other->IsFloatConstant());
-    return bit_cast<uint32_t, float>(other->AsFloatConstant()->value_) ==
-        bit_cast<uint32_t, float>(value_);
+    return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
@@ -2909,10 +2922,11 @@
  public:
   double GetValue() const { return value_; }
 
+  uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
+
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
     DCHECK(other->IsDoubleConstant());
-    return bit_cast<uint64_t, double>(other->AsDoubleConstant()->value_) ==
-        bit_cast<uint64_t, double>(value_);
+    return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
@@ -4005,6 +4019,13 @@
   bool IsDead() const { return !is_live_; }
   bool IsLive() const { return is_live_; }
 
+  bool IsVRegEquivalentOf(HInstruction* other) const {
+    return other != nullptr
+        && other->IsPhi()
+        && other->AsPhi()->GetBlock() == GetBlock()
+        && other->AsPhi()->GetRegNumber() == GetRegNumber();
+  }
+
   // Returns the next equivalent phi (starting from the current one) or null if there is none.
   // An equivalent phi is a phi having the same dex register and type.
   // It assumes that phis with the same dex register are adjacent.
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f549ba8..8fc1e4e 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -51,6 +51,7 @@
 #include "graph_checker.h"
 #include "graph_visualizer.h"
 #include "gvn.h"
+#include "induction_var_analysis.h"
 #include "inliner.h"
 #include "instruction_simplifier.h"
 #include "intrinsics.h"
@@ -462,7 +463,8 @@
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects);
-  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph);
+  HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
   ReferenceTypePropagation* type_propagation =
       new (arena) ReferenceTypePropagation(graph, handles);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
@@ -485,34 +487,44 @@
 
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
+  // TODO: Update passes incompatible with try/catch so we have the same
+  //       pipeline for all methods.
   if (graph->HasTryCatch()) {
-    // TODO: Update the optimizations below to work correctly under try/catch
-    //       semantics. The optimizations above suffice for running codegen
-    //       in the meanwhile.
-    return;
+    HOptimization* optimizations2[] = {
+      side_effects,
+      gvn,
+      dce2,
+      // The codegen has a few assumptions that only the instruction simplifier
+      // can satisfy. For example, the code generator does not expect to see a
+      // HTypeConversion from a type to the same type.
+      simplify4,
+    };
+
+    RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
+  } else {
+    MaybeRunInliner(graph, driver, stats, dex_compilation_unit, pass_observer, handles);
+
+    HOptimization* optimizations2[] = {
+      // BooleanSimplifier depends on the InstructionSimplifier removing
+      // redundant suspend checks to recognize empty blocks.
+      boolean_simplify,
+      fold2,  // TODO: if we don't inline we can also skip fold2.
+      side_effects,
+      gvn,
+      licm,
+      induction,
+      bce,
+      simplify3,
+      dce2,
+      // The codegen has a few assumptions that only the instruction simplifier
+      // can satisfy. For example, the code generator does not expect to see a
+      // HTypeConversion from a type to the same type.
+      simplify4,
+    };
+
+    RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
   }
 
-  MaybeRunInliner(graph, driver, stats, dex_compilation_unit, pass_observer, handles);
-
-  HOptimization* optimizations2[] = {
-    // BooleanSimplifier depends on the InstructionSimplifier removing redundant
-    // suspend checks to recognize empty blocks.
-    boolean_simplify,
-    fold2,  // TODO: if we don't inline we can also skip fold2.
-    side_effects,
-    gvn,
-    licm,
-    bce,
-    simplify3,
-    dce2,
-    // The codegen has a few assumptions that only the instruction simplifier can
-    // satisfy. For example, the code generator does not expect to see a
-    // HTypeConversion from a type to the same type.
-    simplify4,
-  };
-
-  RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
-
   RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer);
 }
 
@@ -560,17 +572,18 @@
                                                      CompilerDriver* compiler_driver,
                                                      const DexCompilationUnit& dex_compilation_unit,
                                                      PassObserver* pass_observer) const {
+  if (graph->HasTryCatch() && graph->IsDebuggable()) {
+    // TODO: b/24054676, stop creating catch phis eagerly to avoid special cases like phis without
+    // inputs.
+    return nullptr;
+  }
+
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScopeCollection handles(soa.Self());
   soa.Self()->TransitionFromRunnableToSuspended(kNative);
   RunOptimizations(graph, compiler_driver, compilation_stats_.get(),
                    dex_compilation_unit, pass_observer, &handles);
 
-  if (graph->HasTryCatch()) {
-    soa.Self()->TransitionFromSuspendedToRunnable();
-    return nullptr;
-  }
-
   AllocateRegisters(graph, codegen, pass_observer);
 
   ArenaAllocator* arena = graph->GetArena();
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index 934514e..34850a5 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -71,23 +71,23 @@
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
     PrintString("BasicBlock ");
     PrintInt(block->GetBlockId());
-    const GrowableArray<HBasicBlock*>& predecessors = block->GetPredecessors();
-    if (!predecessors.IsEmpty()) {
+    const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
+    if (!predecessors.empty()) {
       PrintString(", pred: ");
-      for (size_t i = 0; i < predecessors.Size() -1; i++) {
-        PrintInt(predecessors.Get(i)->GetBlockId());
+      for (size_t i = 0; i < predecessors.size() -1; i++) {
+        PrintInt(predecessors[i]->GetBlockId());
         PrintString(", ");
       }
-      PrintInt(predecessors.Peek()->GetBlockId());
+      PrintInt(predecessors.back()->GetBlockId());
     }
-    const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors();
-    if (!successors.IsEmpty()) {
+    const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
+    if (!successors.empty()) {
       PrintString(", succ: ");
-      for (size_t i = 0; i < successors.Size() - 1; i++) {
-        PrintInt(successors.Get(i)->GetBlockId());
+      for (size_t i = 0; i < successors.size() - 1; i++) {
+        PrintInt(successors[i]->GetBlockId());
         PrintString(", ");
       }
-      PrintInt(successors.Peek()->GetBlockId());
+      PrintInt(successors.back()->GetBlockId());
     }
     PrintNewLine();
     HGraphVisitor::VisitBasicBlock(block);
@@ -131,7 +131,7 @@
     PrintString("  ");
     PrintInt(gota->GetId());
     PrintString(": Goto ");
-    PrintInt(current_block_->GetSuccessors().Get(0)->GetBlockId());
+    PrintInt(current_block_->GetSuccessor(0)->GetBlockId());
     PrintNewLine();
   }
 
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 0384e46..a88c543 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -167,7 +167,7 @@
   ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
   HBoundType* bound_type = new (arena) HBoundType(obj, class_rti, upper_can_be_null);
   // Narrow the type as much as possible.
-  if (class_rti.GetTypeHandle()->IsFinal()) {
+  if (class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) {
     bound_type->SetReferenceTypeInfo(
         ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true));
   } else if (obj_rti.IsValid() && class_rti.IsSupertypeOf(obj_rti)) {
@@ -380,7 +380,7 @@
   } else if (klass != nullptr) {
     ScopedObjectAccess soa(Thread::Current());
     ReferenceTypeInfo::TypeHandle handle = handles_->NewHandle(klass);
-    is_exact = is_exact || klass->IsFinal();
+    is_exact = is_exact || klass->CannotBeAssignedFromOtherTypes();
     instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact));
   } else {
     instr->SetReferenceTypeInfo(
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 37c8bc5..a4f1f45 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -56,6 +56,7 @@
         long_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
         float_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
         double_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        catch_phi_spill_slots_(0),
         safepoints_(allocator, 0),
         processing_core_registers_(false),
         number_of_registers_(-1),
@@ -124,9 +125,7 @@
   }
 }
 
-void RegisterAllocator::BlockRegister(Location location,
-                                      size_t start,
-                                      size_t end) {
+void RegisterAllocator::BlockRegister(Location location, size_t start, size_t end) {
   int reg = location.reg();
   DCHECK(location.IsRegister() || location.IsFpuRegister());
   LiveInterval* interval = location.IsRegister()
@@ -147,6 +146,19 @@
   interval->AddRange(start, end);
 }
 
+void RegisterAllocator::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
+  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
+      BlockRegister(Location::RegisterLocation(i), start, end);
+    }
+  }
+  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
+    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
+      BlockRegister(Location::FpuRegisterLocation(i), start, end);
+    }
+  }
+}
+
 void RegisterAllocator::AllocateRegistersInternal() {
   // Iterate post-order, to ensure the list is sorted, and the last added interval
   // is the one with the lowest start position.
@@ -159,6 +171,13 @@
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
       ProcessInstruction(inst_it.Current());
     }
+
+    if (block->IsCatchBlock()) {
+      // By blocking all registers at the top of each catch block, we force
+      // intervals used after catch to spill.
+      size_t position = block->GetLifetimeStart();
+      BlockRegisters(position, position + 1);
+    }
   }
 
   number_of_registers_ = codegen_->GetNumberOfCoreRegisters();
@@ -275,21 +294,7 @@
   }
 
   if (locations->WillCall()) {
-    // Block all registers.
-    for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
-      if (!codegen_->IsCoreCalleeSaveRegister(i)) {
-        BlockRegister(Location::RegisterLocation(i),
-                      position,
-                      position + 1);
-      }
-    }
-    for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
-      if (!codegen_->IsFloatingPointCalleeSaveRegister(i)) {
-        BlockRegister(Location::FpuRegisterLocation(i),
-                      position,
-                      position + 1);
-      }
-    }
+    BlockRegisters(position, position + 1, /* caller_save_only */ true);
   }
 
   for (size_t i = 0; i < instruction->InputCount(); ++i) {
@@ -378,6 +383,10 @@
     DCHECK(output.IsUnallocated() || output.IsConstant());
   }
 
+  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+    AllocateSpillSlotForCatchPhi(instruction->AsPhi());
+  }
+
   // If needed, add interval to the list of unhandled intervals.
   if (current->HasSpillSlot() || instruction->IsConstant()) {
     // Split just before first register use.
@@ -1212,14 +1221,13 @@
    * moves in B3.
    */
   if (block_from->GetDominator() != nullptr) {
-    const GrowableArray<HBasicBlock*>& dominated = block_from->GetDominator()->GetDominatedBlocks();
-    for (size_t i = 0; i < dominated.Size(); ++i) {
-      size_t position = dominated.Get(i)->GetLifetimeStart();
+    for (HBasicBlock* dominated : block_from->GetDominator()->GetDominatedBlocks()) {
+      size_t position = dominated->GetLifetimeStart();
       if ((position > from) && (block_to->GetLifetimeStart() > position)) {
         // Even if we found a better block, we continue iterating in case
         // a dominated block is closer.
         // Note that dominated blocks are not sorted in liveness order.
-        block_to = dominated.Get(i);
+        block_to = dominated;
         DCHECK_NE(block_to, block_from);
       }
     }
@@ -1283,6 +1291,8 @@
   }
 
   HInstruction* defined_by = parent->GetDefinedBy();
+  DCHECK(!defined_by->IsPhi() || !defined_by->AsPhi()->IsCatchPhi());
+
   if (defined_by->IsParameterValue()) {
     // Parameters have their own stack slot.
     parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
@@ -1299,12 +1309,6 @@
     return;
   }
 
-  LiveInterval* last_sibling = interval;
-  while (last_sibling->GetNextSibling() != nullptr) {
-    last_sibling = last_sibling->GetNextSibling();
-  }
-  size_t end = last_sibling->GetEnd();
-
   GrowableArray<size_t>* spill_slots = nullptr;
   switch (interval->GetType()) {
     case Primitive::kPrimDouble:
@@ -1337,6 +1341,7 @@
     }
   }
 
+  size_t end = interval->GetLastSibling()->GetEnd();
   if (parent->NeedsTwoSpillSlots()) {
     if (slot == spill_slots->Size()) {
       // We need a new spill slot.
@@ -1372,6 +1377,28 @@
       || destination.IsDoubleStackSlot();
 }
 
+void RegisterAllocator::AllocateSpillSlotForCatchPhi(HPhi* phi) {
+  LiveInterval* interval = phi->GetLiveInterval();
+
+  HInstruction* previous_phi = phi->GetPrevious();
+  DCHECK(previous_phi == nullptr ||
+         previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+      << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
+
+  if (phi->IsVRegEquivalentOf(previous_phi)) {
+    // This is an equivalent of the previous phi. We need to assign the same
+    // catch phi slot.
+    DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
+    interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
+  } else {
+    // Allocate a new spill slot for this catch phi.
+    // TODO: Reuse spill slots when intervals of phis from different catch
+    //       blocks do not overlap.
+    interval->SetSpillSlot(catch_phi_spill_slots_);
+    catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+  }
+}
+
 void RegisterAllocator::AddMove(HParallelMove* move,
                                 Location source,
                                 Location destination,
@@ -1498,7 +1525,7 @@
   DCHECK(IsValidDestination(destination)) << destination;
   if (source.Equals(destination)) return;
 
-  DCHECK_EQ(block->GetSuccessors().Size(), 1u);
+  DCHECK_EQ(block->NumberOfNormalSuccessors(), 1u);
   HInstruction* last = block->GetLastInstruction();
   // We insert moves at exit for phi predecessors and connecting blocks.
   // A block ending with an if cannot branch to a block with phis because
@@ -1725,13 +1752,13 @@
 
   // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
   // we need to put the moves at the entry of `to`.
-  if (from->GetSuccessors().Size() == 1) {
+  if (from->NumberOfNormalSuccessors() == 1) {
     InsertParallelMoveAtExitOf(from,
                                interval->GetParent()->GetDefinedBy(),
                                source->ToLocation(),
                                destination->ToLocation());
   } else {
-    DCHECK_EQ(to->GetPredecessors().Size(), 1u);
+    DCHECK_EQ(to->GetPredecessors().size(), 1u);
     InsertParallelMoveAtEntryOf(to,
                                 interval->GetParent()->GetDefinedBy(),
                                 source->ToLocation(),
@@ -1769,17 +1796,25 @@
     } else if (instruction->IsCurrentMethod()) {
       // The current method is always at offset 0.
       DCHECK(!current->HasSpillSlot() || (current->GetSpillSlot() == 0));
+    } else if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
+      DCHECK(current->HasSpillSlot());
+      size_t slot = current->GetSpillSlot()
+                    + GetNumberOfSpillSlots()
+                    + reserved_out_slots_
+                    - catch_phi_spill_slots_;
+      current->SetSpillSlot(slot * kVRegSize);
     } else if (current->HasSpillSlot()) {
       // Adjust the stack slot, now that we know the number of them for each type.
       // The way this implementation lays out the stack is the following:
-      // [parameter slots     ]
-      // [double spill slots  ]
-      // [long spill slots    ]
-      // [float spill slots   ]
-      // [int/ref values      ]
-      // [maximum out values  ] (number of arguments for calls)
-      // [art method          ].
-      uint32_t slot = current->GetSpillSlot();
+      // [parameter slots       ]
+      // [catch phi spill slots ]
+      // [double spill slots    ]
+      // [long spill slots      ]
+      // [float spill slots     ]
+      // [int/ref values        ]
+      // [maximum out values    ] (number of arguments for calls)
+      // [art method            ].
+      size_t slot = current->GetSpillSlot();
       switch (current->GetType()) {
         case Primitive::kPrimDouble:
           slot += long_spill_slots_.Size();
@@ -1829,12 +1864,22 @@
   // Resolve non-linear control flow across branches. Order does not matter.
   for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    BitVector* live = liveness_.GetLiveInSet(*block);
-    for (uint32_t idx : live->Indexes()) {
-      HInstruction* current = liveness_.GetInstructionFromSsaIndex(idx);
-      LiveInterval* interval = current->GetLiveInterval();
-      for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
-        ConnectSplitSiblings(interval, block->GetPredecessors().Get(i), block);
+    if (block->IsCatchBlock()) {
+      // Instructions live at the top of catch blocks were forced to spill.
+      if (kIsDebugBuild) {
+        BitVector* live = liveness_.GetLiveInSet(*block);
+        for (uint32_t idx : live->Indexes()) {
+          LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+          DCHECK(!interval->GetSiblingAt(block->GetLifetimeStart())->HasRegister());
+        }
+      }
+    } else {
+      BitVector* live = liveness_.GetLiveInSet(*block);
+      for (uint32_t idx : live->Indexes()) {
+        LiveInterval* interval = liveness_.GetInstructionFromSsaIndex(idx)->GetLiveInterval();
+        for (HBasicBlock* predecessor : block->GetPredecessors()) {
+          ConnectSplitSiblings(interval, predecessor, block);
+        }
       }
     }
   }
@@ -1842,16 +1887,20 @@
   // Resolve phi inputs. Order does not matter.
   for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) {
     HBasicBlock* current = it.Current();
-    for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-      HInstruction* phi = inst_it.Current();
-      for (size_t i = 0, e = current->GetPredecessors().Size(); i < e; ++i) {
-        HBasicBlock* predecessor = current->GetPredecessors().Get(i);
-        DCHECK_EQ(predecessor->GetSuccessors().Size(), 1u);
-        HInstruction* input = phi->InputAt(i);
-        Location source = input->GetLiveInterval()->GetLocationAt(
-            predecessor->GetLifetimeEnd() - 1);
-        Location destination = phi->GetLiveInterval()->ToLocation();
-        InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
+    if (current->IsCatchBlock()) {
+      // Catch phi values are set at runtime by the exception delivery mechanism.
+    } else {
+      for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+        HInstruction* phi = inst_it.Current();
+        for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
+          HBasicBlock* predecessor = current->GetPredecessor(i);
+          DCHECK_EQ(predecessor->NumberOfNormalSuccessors(), 1u);
+          HInstruction* input = phi->InputAt(i);
+          Location source = input->GetLiveInterval()->GetLocationAt(
+              predecessor->GetLifetimeEnd() - 1);
+          Location destination = phi->GetLiveInterval()->ToLocation();
+          InsertParallelMoveAtExitOf(predecessor, phi, source, destination);
+        }
       }
     }
   }
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index c29fe75..e030464 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -29,6 +29,7 @@
 class HGraph;
 class HInstruction;
 class HParallelMove;
+class HPhi;
 class LiveInterval;
 class Location;
 class SsaLivenessAnalysis;
@@ -72,7 +73,8 @@
     return int_spill_slots_.Size()
         + long_spill_slots_.Size()
         + float_spill_slots_.Size()
-        + double_spill_slots_.Size();
+        + double_spill_slots_.Size()
+        + catch_phi_spill_slots_;
   }
 
   static constexpr const char* kRegisterAllocatorPassName = "register";
@@ -99,10 +101,17 @@
 
   // Update the interval for the register in `location` to cover [start, end).
   void BlockRegister(Location location, size_t start, size_t end);
+  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
 
-  // Allocate a spill slot for the given interval.
+  // Allocate a spill slot for the given interval. Should be called in linear
+  // order of interval starting positions.
   void AllocateSpillSlotFor(LiveInterval* interval);
 
+  // Allocate a spill slot for the given catch phi. Will allocate the same slot
+  // for phis which share the same vreg. Must be called in reverse linear order
+  // of lifetime positions and ascending vreg numbers for correctness.
+  void AllocateSpillSlotForCatchPhi(HPhi* phi);
+
   // Connect adjacent siblings within blocks.
   void ConnectSiblings(LiveInterval* interval);
 
@@ -202,6 +211,11 @@
   GrowableArray<size_t> float_spill_slots_;
   GrowableArray<size_t> double_spill_slots_;
 
+  // Spill slots allocated to catch phis. This category is special-cased because
+  // (1) slots are allocated prior to linear scan and in reverse linear order,
+  // (2) equivalent phis need to share slots despite having different types.
+  size_t catch_phi_spill_slots_;
+
   // Instructions that need a safepoint.
   GrowableArray<HInstruction*> safepoints_;
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 561c3b4..e6209b9 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -241,8 +241,8 @@
     HBasicBlock* block = loop_headers_.Get(i);
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
-      for (size_t pred = 0; pred < block->GetPredecessors().Size(); pred++) {
-        HInstruction* input = ValueOfLocal(block->GetPredecessors().Get(pred), phi->GetRegNumber());
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber());
         phi->AddInput(input);
       }
     }
@@ -369,16 +369,16 @@
     // Save the loop header so that the last phase of the analysis knows which
     // blocks need to be updated.
     loop_headers_.Add(block);
-  } else if (block->GetPredecessors().Size() > 0) {
+  } else if (block->GetPredecessors().size() > 0) {
     // All predecessors have already been visited because we are visiting in reverse post order.
     // We merge the values of all locals, creating phis if those values differ.
     for (size_t local = 0; local < current_locals_->Size(); local++) {
       bool one_predecessor_has_no_value = false;
       bool is_different = false;
-      HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(0), local);
+      HInstruction* value = ValueOfLocal(block->GetPredecessor(0), local);
 
-      for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
-        HInstruction* current = ValueOfLocal(block->GetPredecessors().Get(i), local);
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        HInstruction* current = ValueOfLocal(predecessor, local);
         if (current == nullptr) {
           one_predecessor_has_no_value = true;
           break;
@@ -395,9 +395,9 @@
 
       if (is_different) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid);
-        for (size_t i = 0; i < block->GetPredecessors().Size(); i++) {
-          HInstruction* pred_value = ValueOfLocal(block->GetPredecessors().Get(i), local);
+            GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid);
+        for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
+          HInstruction* pred_value = ValueOfLocal(block->GetPredecessor(i), local);
           phi->SetRawInputAt(i, pred_value);
         }
         block->AddPhi(phi);
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 40502c1..63635f3 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -73,7 +73,7 @@
   forward_predecessors.SetSize(graph_->GetBlocks().Size());
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    size_t number_of_forward_predecessors = block->GetPredecessors().Size();
+    size_t number_of_forward_predecessors = block->GetPredecessors().size();
     if (block->IsLoopHeader()) {
       number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
     }
@@ -89,8 +89,7 @@
   do {
     HBasicBlock* current = worklist.Pop();
     graph_->linear_order_.Add(current);
-    for (size_t i = 0, e = current->GetSuccessors().Size(); i < e; ++i) {
-      HBasicBlock* successor = current->GetSuccessors().Get(i);
+    for (HBasicBlock* successor : current->GetSuccessors()) {
       int block_id = successor->GetBlockId();
       size_t number_of_remaining_predecessors = forward_predecessors.Get(block_id);
       if (number_of_remaining_predecessors == 1) {
@@ -185,17 +184,27 @@
 
     // Set phi inputs of successors of this block corresponding to this block
     // as live_in.
-    for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
-      HBasicBlock* successor = block->GetSuccessors().Get(i);
+    for (HBasicBlock* successor : block->GetSuccessors()) {
       live_in->Union(GetLiveInSet(*successor));
-      size_t phi_input_index = successor->GetPredecessorIndexOf(block);
-      for (HInstructionIterator inst_it(successor->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
-        HInstruction* phi = inst_it.Current();
-        HInstruction* input = phi->InputAt(phi_input_index);
-        input->GetLiveInterval()->AddPhiUse(phi, phi_input_index, block);
-        // A phi input whose last user is the phi dies at the end of the predecessor block,
-        // and not at the phi's lifetime position.
-        live_in->SetBit(input->GetSsaIndex());
+      if (successor->IsCatchBlock()) {
+        // Inputs of catch phis will be kept alive through their environment
+        // uses, allowing the runtime to copy their values to the corresponding
+        // catch phi spill slots when an exception is thrown.
+        // The only instructions which may not be recorded in the environments
+        // are constants created by the SSA builder as typed equivalents of
+        // untyped constants from the bytecode, or phis with only such constants
+        // as inputs (verified by SSAChecker). Their raw binary value must
+        // therefore be the same and we only need to keep alive one.
+      } else {
+        size_t phi_input_index = successor->GetPredecessorIndexOf(block);
+        for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+          HInstruction* phi = phi_it.Current();
+          HInstruction* input = phi->InputAt(phi_input_index);
+          input->GetLiveInterval()->AddPhiUse(phi, phi_input_index, block);
+          // A phi input whose last user is the phi dies at the end of the predecessor block,
+          // and not at the phi's lifetime position.
+          live_in->SetBit(input->GetSsaIndex());
+        }
       }
     }
 
@@ -296,8 +305,7 @@
   BitVector* live_out = GetLiveOutSet(block);
   bool changed = false;
   // The live_out set of a block is the union of live_in sets of its successors.
-  for (size_t i = 0, e = block.GetSuccessors().Size(); i < e; ++i) {
-    HBasicBlock* successor = block.GetSuccessors().Get(i);
+  for (HBasicBlock* successor : block.GetSuccessors()) {
     if (live_out->Union(GetLiveInSet(*successor))) {
       changed = true;
     }
@@ -342,8 +350,8 @@
     // will avoid a move between the two blocks.
     HBasicBlock* block = liveness.GetBlockFromPosition(GetStart() / 2);
     size_t next_register_use = FirstRegisterUse();
-    for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) {
-      size_t position = block->GetPredecessors().Get(i)->GetLifetimeEnd() - 1;
+    for (HBasicBlock* predecessor : block->GetPredecessors()) {
+      size_t position = predecessor->GetLifetimeEnd() - 1;
       // We know positions above GetStart() do not have a location yet.
       if (position < GetStart()) {
         LiveInterval* existing = GetParent()->GetSiblingAt(position);
@@ -376,17 +384,16 @@
             return reg;
           }
         }
-        const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors();
         // If the instruction dies at the phi assignment, we can try having the
         // same register.
-        if (end == predecessors.Get(input_index)->GetLifetimeEnd()) {
+        if (end == user->GetBlock()->GetPredecessor(input_index)->GetLifetimeEnd()) {
           for (size_t i = 0, e = user->InputCount(); i < e; ++i) {
             if (i == input_index) {
               continue;
             }
             HInstruction* input = user->InputAt(i);
             Location location = input->GetLiveInterval()->GetLocationAt(
-                predecessors.Get(i)->GetLifetimeEnd() - 1);
+                user->GetBlock()->GetPredecessor(i)->GetLifetimeEnd() - 1);
             if (location.IsRegisterKind()) {
               int reg = RegisterOrLowRegister(location);
               if (free_until[reg] >= use_position) {
@@ -420,10 +427,11 @@
 int LiveInterval::FindHintAtDefinition() const {
   if (defined_by_->IsPhi()) {
     // Try to use the same register as one of the inputs.
-    const GrowableArray<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors();
+    const ArenaVector<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors();
     for (size_t i = 0, e = defined_by_->InputCount(); i < e; ++i) {
       HInstruction* input = defined_by_->InputAt(i);
-      size_t end = predecessors.Get(i)->GetLifetimeEnd();
+      DCHECK_LT(i, predecessors.size());
+      size_t end = predecessors[i]->GetLifetimeEnd();
       LiveInterval* input_interval = input->GetLiveInterval()->GetSiblingAt(end - 1);
       if (input_interval->GetEnd() == end) {
         // If the input dies at the end of the predecessor, we know its register can
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index a7044de..ef396cb 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -1209,6 +1209,9 @@
     // A value that's not live in compiled code may still be needed in interpreter,
     // due to code motion, etc.
     if (env_holder->IsDeoptimize()) return true;
+    // A value live at a throwing instruction in a try block may be copied by
+    // the exception handler to its location at the top of the catch block.
+    if (env_holder->CanThrowIntoCatchBlock()) return true;
     if (instruction->GetBlock()->GetGraph()->IsDebuggable()) return true;
     return instruction->GetType() == Primitive::kPrimNot;
   }
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1f1530f..1f0bac5 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -286,7 +286,7 @@
         stack_map.SetDexRegisterMapOffset(
             stack_map_encoding_,
             code_info.GetStackMapAt(entry.same_dex_register_map_as_, stack_map_encoding_)
-                     .GetDexRegisterMapOffset(stack_map_encoding_));
+                .GetDexRegisterMapOffset(stack_map_encoding_));
       } else {
         // New dex registers maps should be added to the stack map.
         MemoryRegion register_region = dex_register_locations_region.Subregion(
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index 5ca66a1..e745d94 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -36,7 +36,7 @@
   bool graph_built = builder.BuildGraph(*item);
   ASSERT_TRUE(graph_built);
 
-  HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessors().Get(0);
+  HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessor(0);
   HInstruction* first_instruction = first_block->GetFirstInstruction();
   // Account for some tests having a store local as first instruction.
   ASSERT_TRUE(first_instruction->IsSuspendCheck()
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index dbcaab9..a4d1837 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -495,6 +495,7 @@
   virtual void clz(Register rd, Register rm, Condition cond = AL) = 0;
   virtual void movw(Register rd, uint16_t imm16, Condition cond = AL) = 0;
   virtual void movt(Register rd, uint16_t imm16, Condition cond = AL) = 0;
+  virtual void rbit(Register rd, Register rm, Condition cond = AL) = 0;
 
   // Multiply instructions.
   virtual void mul(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 184d8a5..f7772ae 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -735,6 +735,20 @@
 }
 
 
+void Arm32Assembler::rbit(Register rd, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B26 | B25 | B23 | B22 | B21 | B20 | (0xf << 16) |
+                     (static_cast<int32_t>(rd) << kRdShift) |
+                     (0xf << 8) | B5 | B4 | static_cast<int32_t>(rm);
+  Emit(encoding);
+}
+
+
 void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode,
                                Register rd, Register rn,
                                Register rm, Register rs) {
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 17c6747..3407369 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -87,6 +87,7 @@
   void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
   void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+  void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
 
   // Multiply instructions.
   void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index e6412ac..2a0912e 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -883,4 +883,8 @@
   DriverStr(expected, "strexd");
 }
 
+TEST_F(AssemblerArm32Test, rbit) {
+  T3Helper(&arm::Arm32Assembler::rbit, true, "rbit{cond} {reg1}, {reg2}", "rbit");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index b677789..0f6c4f5 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2426,6 +2426,25 @@
 }
 
 
+void Thumb2Assembler::rbit(Register rd, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CheckCondition(cond);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+  CHECK_NE(rd, SP);
+  CHECK_NE(rm, SP);
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 |
+      B25 | B23 | B20 |
+      static_cast<uint32_t>(rm) << 16 |
+      0xf << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      B7 | B5 |
+      static_cast<uint32_t>(rm);
+  Emit32(encoding);
+}
+
+
 void Thumb2Assembler::ldrex(Register rt, Register rn, uint16_t imm, Condition cond) {
   CHECK_NE(rn, kNoRegister);
   CHECK_NE(rt, kNoRegister);
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 6dee68e..a1a8927 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -111,6 +111,7 @@
   void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
   void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
   void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+  void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
 
   // Multiply instructions.
   void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 84f5cb1..9c08ce0 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -1019,4 +1019,12 @@
   DriverStr(expected, "clz");
 }
 
+TEST_F(AssemblerThumb2Test, rbit) {
+  __ rbit(arm::R1, arm::R0);
+
+  const char* expected = "rbit r1, r0\n";
+
+  DriverStr(expected, "rbit");
+}
+
 }  // namespace art
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index b1d7b4c..b6a228c 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -216,7 +216,7 @@
     std::unique_ptr<const DexFile> dex_file(DexFile::Open(
         &dex_file_data_[0], dex_file_data_.size(), dex_location, 0u, nullptr, &error_msg));
     CHECK(dex_file != nullptr) << error_msg;
-    return std::move(dex_file);
+    return dex_file;
   }
 
   uint32_t GetStringIdx(const std::string& type) {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index e3962b4..04e815a 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -158,6 +158,20 @@
   EmitUint8(0xC8 + dst);
 }
 
+void X86Assembler::bsfl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBC);
+  EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::bsfl(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBC);
+  EmitOperand(dst, src);
+}
+
 void X86Assembler::bsrl(Register dst, Register src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1423,6 +1437,26 @@
 }
 
 
+void X86Assembler::roll(Register reg, const Immediate& imm) {
+  EmitGenericShift(0, Operand(reg), imm);
+}
+
+
+void X86Assembler::roll(Register operand, Register shifter) {
+  EmitGenericShift(0, Operand(operand), shifter);
+}
+
+
+void X86Assembler::rorl(Register reg, const Immediate& imm) {
+  EmitGenericShift(1, Operand(reg), imm);
+}
+
+
+void X86Assembler::rorl(Register operand, Register shifter) {
+  EmitGenericShift(1, Operand(operand), shifter);
+}
+
+
 void X86Assembler::negl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF7);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 7d7b3d3..af78663 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -319,9 +319,16 @@
   void movntl(const Address& dst, Register src);
 
   void bswapl(Register dst);
+  void bsfl(Register dst, Register src);
+  void bsfl(Register dst, const Address& src);
   void bsrl(Register dst, Register src);
   void bsrl(Register dst, const Address& src);
 
+  void rorl(Register reg, const Immediate& imm);
+  void rorl(Register operand, Register shifter);
+  void roll(Register reg, const Immediate& imm);
+  void roll(Register operand, Register shifter);
+
   void movzxb(Register dst, ByteRegister src);
   void movzxb(Register dst, const Address& src);
   void movsxb(Register dst, ByteRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 9ac54af..16f9db4 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -32,6 +32,10 @@
 
 class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, x86::Register,
                                               x86::XmmRegister, x86::Immediate> {
+ public:
+  typedef AssemblerTest<x86::X86Assembler, x86::Register,
+                         x86::XmmRegister, x86::Immediate> Base;
+
  protected:
   std::string GetArchitectureString() OVERRIDE {
     return "x86";
@@ -230,6 +234,19 @@
   DriverStr(expected, "rep_movsw");
 }
 
+TEST_F(AssemblerX86Test, Bsfl) {
+  DriverStr(RepeatRR(&x86::X86Assembler::bsfl, "bsfl %{reg2}, %{reg1}"), "bsfl");
+}
+
+TEST_F(AssemblerX86Test, BsflAddress) {
+  GetAssembler()->bsfl(x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  const char* expected =
+    "bsfl 0xc(%EDI,%EBX,4), %EDI\n";
+
+  DriverStr(expected, "bsfl_address");
+}
+
 TEST_F(AssemblerX86Test, Bsrl) {
   DriverStr(RepeatRR(&x86::X86Assembler::bsrl, "bsrl %{reg2}, %{reg1}"), "bsrl");
 }
@@ -243,6 +260,52 @@
   DriverStr(expected, "bsrl_address");
 }
 
+// Rorl only allows CL as the shift count.
+std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86::Register*> registers = assembler_test->GetRegisters();
+
+  x86::Register shifter(x86::ECX);
+  for (auto reg : registers) {
+    assembler->rorl(*reg, shifter);
+    str << "rorl %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86Test, RorlReg) {
+  DriverFn(&rorl_fn, "rorl");
+}
+
+TEST_F(AssemblerX86Test, RorlImm) {
+  DriverStr(RepeatRI(&x86::X86Assembler::rorl, 1U, "rorl ${imm}, %{reg}"), "rorli");
+}
+
+// Roll only allows CL as the shift count.
+std::string roll_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86::Register*> registers = assembler_test->GetRegisters();
+
+  x86::Register shifter(x86::ECX);
+  for (auto reg : registers) {
+    assembler->roll(*reg, shifter);
+    str << "roll %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86Test, RollReg) {
+  DriverFn(&roll_fn, "roll");
+}
+
+TEST_F(AssemblerX86Test, RollImm) {
+  DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
+}
+
 /////////////////
 // Near labels //
 /////////////////
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 88ea990..89d7915 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1866,6 +1866,46 @@
 }
 
 
+void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(false, 0, reg, imm);
+}
+
+
+void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(false, 0, operand, shifter);
+}
+
+
+void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(false, 1, reg, imm);
+}
+
+
+void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(false, 1, operand, shifter);
+}
+
+
+void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 0, reg, imm);
+}
+
+
+void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 0, operand, shifter);
+}
+
+
+void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 1, reg, imm);
+}
+
+
+void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
+  EmitGenericShift(true, 1, operand, shifter);
+}
+
+
 void X86_64Assembler::negl(CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg);
@@ -2140,6 +2180,38 @@
   EmitUint8(0xC8 + dst.LowBits());
 }
 
+void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xBC);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xBC);
+  EmitOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xBC);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xBC);
+  EmitOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index c38aba5..c8875e8 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -633,11 +633,26 @@
   void bswapl(CpuRegister dst);
   void bswapq(CpuRegister dst);
 
+  void bsfl(CpuRegister dst, CpuRegister src);
+  void bsfl(CpuRegister dst, const Address& src);
+  void bsfq(CpuRegister dst, CpuRegister src);
+  void bsfq(CpuRegister dst, const Address& src);
+
   void bsrl(CpuRegister dst, CpuRegister src);
   void bsrl(CpuRegister dst, const Address& src);
   void bsrq(CpuRegister dst, CpuRegister src);
   void bsrq(CpuRegister dst, const Address& src);
 
+  void rorl(CpuRegister reg, const Immediate& imm);
+  void rorl(CpuRegister operand, CpuRegister shifter);
+  void roll(CpuRegister reg, const Immediate& imm);
+  void roll(CpuRegister operand, CpuRegister shifter);
+
+  void rorq(CpuRegister reg, const Immediate& imm);
+  void rorq(CpuRegister operand, CpuRegister shifter);
+  void rolq(CpuRegister reg, const Immediate& imm);
+  void rolq(CpuRegister operand, CpuRegister shifter);
+
   void repne_scasw();
   void repe_cmpsw();
   void repe_cmpsl();
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 9e64b47..82378f7 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -495,6 +495,98 @@
   DriverStr(RepeatRI(&x86_64::X86_64Assembler::sarq, 1U, "sarq ${imm}, %{reg}"), "sarqi");
 }
 
+// Rorl only allows CL as the shift count.
+std::string rorl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->rorl(*reg, shifter);
+    str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, RorlReg) {
+  DriverFn(&rorl_fn, "rorl");
+}
+
+TEST_F(AssemblerX86_64Test, RorlImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::rorl, 1U, "rorl ${imm}, %{reg}"), "rorli");
+}
+
+// Roll only allows CL as the shift count.
+std::string roll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->roll(*reg, shifter);
+    str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, RollReg) {
+  DriverFn(&roll_fn, "roll");
+}
+
+TEST_F(AssemblerX86_64Test, RollImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
+}
+
+// Rorq only allows CL as the shift count.
+std::string rorq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->rorq(*reg, shifter);
+    str << "rorq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, RorqReg) {
+  DriverFn(&rorq_fn, "rorq");
+}
+
+TEST_F(AssemblerX86_64Test, RorqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::rorq, 1U, "rorq ${imm}, %{reg}"), "rorqi");
+}
+
+// Rolq only allows CL as the shift count.
+std::string rolq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->rolq(*reg, shifter);
+    str << "rolq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, RolqReg) {
+  DriverFn(&rolq_fn, "rolq");
+}
+
+TEST_F(AssemblerX86_64Test, RolqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::rolq, 1U, "rolq ${imm}, %{reg}"), "rolqi");
+}
+
 TEST_F(AssemblerX86_64Test, CmpqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::cmpq, "cmpq %{reg2}, %{reg1}"), "cmpq");
 }
@@ -1141,6 +1233,44 @@
   DriverStr(RepeatR(&x86_64::X86_64Assembler::bswapq, "bswap %{reg}"), "bswapq");
 }
 
+TEST_F(AssemblerX86_64Test, Bsfl) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::bsfl, "bsfl %{reg2}, %{reg1}"), "bsfl");
+}
+
+TEST_F(AssemblerX86_64Test, BsflAddress) {
+  GetAssembler()->bsfl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->bsfl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->bsfl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "bsfl 0xc(%RDI,%RBX,4), %R10d\n"
+    "bsfl 0xc(%R10,%RBX,4), %edi\n"
+    "bsfl 0xc(%RDI,%R9,4), %edi\n";
+
+  DriverStr(expected, "bsfl_address");
+}
+
+TEST_F(AssemblerX86_64Test, Bsfq) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::bsfq, "bsfq %{reg2}, %{reg1}"), "bsfq");
+}
+
+TEST_F(AssemblerX86_64Test, BsfqAddress) {
+  GetAssembler()->bsfq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->bsfq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+  GetAssembler()->bsfq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+  const char* expected =
+    "bsfq 0xc(%RDI,%RBX,4), %R10\n"
+    "bsfq 0xc(%R10,%RBX,4), %RDI\n"
+    "bsfq 0xc(%RDI,%R9,4), %RDI\n";
+
+  DriverStr(expected, "bsfq_address");
+}
+
 TEST_F(AssemblerX86_64Test, Bsrl) {
   DriverStr(Repeatrr(&x86_64::X86_64Assembler::bsrl, "bsrl %{reg2}, %{reg1}"), "bsrl");
 }
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index d4574f4..d4bef0f 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -928,6 +928,11 @@
         has_modrm = true;
         load = true;
         break;
+      case 0xBC:
+        opcode1 = "bsf";
+        has_modrm = true;
+        load = true;
+        break;
       case 0xBD:
         opcode1 = "bsr";
         has_modrm = true;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index aa4cf55..e248604 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1606,10 +1606,8 @@
       // stack. Need to revoke the thread-local allocation stacks that
       // point into it.
       ScopedThreadSuspension sts(self, kNative);
-      ThreadList* thread_list = Runtime::Current()->GetThreadList();
-      thread_list->SuspendAll(__FUNCTION__);
+      ScopedSuspendAll ssa(__FUNCTION__);
       heap->RevokeAllThreadLocalAllocationStacks(self);
-      thread_list->ResumeAll();
     }
     {
       // Mark dex caches.
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 963eecb..995a1d5 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -99,6 +99,7 @@
   jit/jit.cc \
   jit/jit_code_cache.cc \
   jit/jit_instrumentation.cc \
+  jit/profiling_info.cc \
   lambda/art_lambda_method.cc \
   lambda/box_table.cc \
   lambda/closure.cc \
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index e6710ed..f10799c 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -326,9 +326,9 @@
           [referrer] "r"(referrer)
         : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1",
           "fp", "ra",
-          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
-          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
-          "f27", "f28", "f29", "f30", "f31",
+          "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
+          "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
+          "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
           "memory");  // clobber.
 #elif defined(__mips__) && defined(__LP64__)
     __asm__ __volatile__ (
@@ -680,9 +680,9 @@
           [referrer] "r"(referrer), [hidden] "r"(hidden)
         : "at", "v0", "v1", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1",
           "fp", "ra",
-          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
-          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
-          "f27", "f28", "f29", "f30", "f31",
+          "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
+          "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
+          "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
           "memory");  // clobber.
 #elif defined(__mips__) && defined(__LP64__)
     __asm__ __volatile__ (
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index cfd7fcd..d6b2b7e 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -26,6 +26,7 @@
 #include "dex_file.h"
 #include "dex_file-inl.h"
 #include "gc_root-inl.h"
+#include "jit/profiling_info.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
@@ -527,13 +528,12 @@
 }
 
 template<typename RootVisitorType>
-void ArtMethod::VisitRoots(RootVisitorType& visitor) {
+void ArtMethod::VisitRoots(RootVisitorType& visitor, size_t pointer_size) {
   ArtMethod* interface_method = nullptr;
   mirror::Class* klass = declaring_class_.Read();
   if (UNLIKELY(klass != nullptr && klass->IsProxyClass())) {
     // For normal methods, dex cache shortcuts will be visited through the declaring class.
     // However, for proxies we need to keep the interface method alive, so we visit its roots.
-    size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
     interface_method = mirror::DexCache::GetElementPtrSize(
         GetDexCacheResolvedMethods(pointer_size),
         GetDexMethodIndex(),
@@ -541,10 +541,14 @@
     DCHECK(interface_method != nullptr);
     DCHECK_EQ(interface_method,
               Runtime::Current()->GetClassLinker()->FindMethodForProxy(klass, this));
-    interface_method->VisitRoots(visitor);
+    interface_method->VisitRoots(visitor, pointer_size);
   }
 
   visitor.VisitRootIfNonNull(declaring_class_.AddressWithoutBarrier());
+  ProfilingInfo* profiling_info = GetProfilingInfo(pointer_size);
+  if (hotness_count_ != 0 && !IsNative() && profiling_info != nullptr) {
+    profiling_info->VisitRoots(visitor);
+  }
 }
 
 inline void ArtMethod::CopyFrom(const ArtMethod* src, size_t image_pointer_size) {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 64416d2..92648b9 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -30,6 +30,7 @@
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "jit/profiling_info.h"
 #include "jni_internal.h"
 #include "mapping_table.h"
 #include "mirror/abstract_method.h"
@@ -222,28 +223,48 @@
   return DexFile::kDexNoIndex;
 }
 
-uintptr_t ArtMethod::ToNativeQuickPc(const uint32_t dex_pc, bool abort_on_failure) {
+uintptr_t ArtMethod::ToNativeQuickPc(const uint32_t dex_pc,
+                                     bool is_for_catch_handler,
+                                     bool abort_on_failure) {
   const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
-  MappingTable table(entry_point != nullptr ?
-      GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
-  if (table.TotalSize() == 0) {
-    DCHECK_EQ(dex_pc, 0U);
-    return 0;   // Special no mapping/pc == 0 case
-  }
-  // Assume the caller wants a dex-to-pc mapping so check here first.
-  typedef MappingTable::DexToPcIterator It;
-  for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-    if (cur.DexPc() == dex_pc) {
-      return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
+  if (IsOptimized(sizeof(void*))) {
+    // Optimized code does not have a mapping table. Search for the dex-to-pc
+    // mapping in stack maps.
+    CodeInfo code_info = GetOptimizedCodeInfo();
+    StackMapEncoding encoding = code_info.ExtractEncoding();
+
+    // All stack maps are stored in the same CodeItem section, safepoint stack
+    // maps first, then catch stack maps. We use `is_for_catch_handler` to select
+    // the order of iteration.
+    StackMap stack_map =
+        LIKELY(is_for_catch_handler) ? code_info.GetCatchStackMapForDexPc(dex_pc, encoding)
+                                     : code_info.GetStackMapForDexPc(dex_pc, encoding);
+    if (stack_map.IsValid()) {
+      return reinterpret_cast<uintptr_t>(entry_point) + stack_map.GetNativePcOffset(encoding);
+    }
+  } else {
+    MappingTable table(entry_point != nullptr ?
+        GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
+    if (table.TotalSize() == 0) {
+      DCHECK_EQ(dex_pc, 0U);
+      return 0;   // Special no mapping/pc == 0 case
+    }
+    // Assume the caller wants a dex-to-pc mapping so check here first.
+    typedef MappingTable::DexToPcIterator It;
+    for (It cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
+      if (cur.DexPc() == dex_pc) {
+        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
+      }
+    }
+    // Now check pc-to-dex mappings.
+    typedef MappingTable::PcToDexIterator It2;
+    for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
+      if (cur.DexPc() == dex_pc) {
+        return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
+      }
     }
   }
-  // Now check pc-to-dex mappings.
-  typedef MappingTable::PcToDexIterator It2;
-  for (It2 cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-    if (cur.DexPc() == dex_pc) {
-      return reinterpret_cast<uintptr_t>(entry_point) + cur.NativePcOffset();
-    }
-  }
+
   if (abort_on_failure) {
     LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
                << " in " << PrettyMethod(this);
@@ -579,4 +600,17 @@
   return oat_method.GetVmapTable();
 }
 
+ProfilingInfo* ArtMethod::CreateProfilingInfo() {
+  DCHECK(!Runtime::Current()->IsAotCompiler());
+  ProfilingInfo* info = ProfilingInfo::Create(this);
+  MemberOffset offset = ArtMethod::EntryPointFromJniOffset(sizeof(void*));
+  uintptr_t pointer = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value();
+  if (!reinterpret_cast<Atomic<ProfilingInfo*>*>(pointer)->
+          CompareExchangeStrongSequentiallyConsistent(nullptr, info)) {
+    return GetProfilingInfo(sizeof(void*));
+  } else {
+    return info;
+  }
+}
+
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index e0b11d0..f78c827 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -33,6 +33,7 @@
 namespace art {
 
 union JValue;
+class ProfilingInfo;
 class ScopedObjectAccessAlreadyRunnable;
 class StringPiece;
 class ShadowFrame;
@@ -389,16 +390,25 @@
         PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
   }
 
+  ProfilingInfo* CreateProfilingInfo() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ProfilingInfo* GetProfilingInfo(size_t pointer_size) {
+    return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJniPtrSize(pointer_size));
+  }
+
   void* GetEntryPointFromJni() {
     return GetEntryPointFromJniPtrSize(sizeof(void*));
   }
+
   ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(size_t pointer_size) {
     return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size);
   }
 
   void SetEntryPointFromJni(const void* entrypoint) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsNative());
     SetEntryPointFromJniPtrSize(entrypoint, sizeof(void*));
   }
+
   ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, size_t pointer_size) {
     SetNativePointer(EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size);
   }
@@ -432,7 +442,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Converts a dex PC to a native PC.
-  uintptr_t ToNativeQuickPc(const uint32_t dex_pc, bool abort_on_failure = true)
+  uintptr_t ToNativeQuickPc(const uint32_t dex_pc,
+                            bool is_for_catch_handler,
+                            bool abort_on_failure = true)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   MethodReference ToMethodReference() SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -448,7 +460,7 @@
 
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
   template<typename RootVisitorType>
-  void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS;
+  void VisitRoots(RootVisitorType& visitor, size_t pointer_size) NO_THREAD_SAFETY_ANALYSIS;
 
   const DexFile* GetDexFile() SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -523,6 +535,10 @@
   ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  uint16_t IncrementCounter() {
+    return ++hotness_count_;
+  }
+
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
@@ -544,7 +560,11 @@
   // Entry within a dispatch table for this method. For static/direct methods the index is into
   // the declaringClass.directMethods, for virtual methods the vtable and for interface methods the
   // ifTable.
-  uint32_t method_index_;
+  uint16_t method_index_;
+
+  // The hotness we measure for this method. Incremented by the interpreter. Not atomic, as we allow
+  // missing increments: if the method is hot, we will see it eventually.
+  uint16_t hotness_count_;
 
   // Fake padding field gets inserted here.
 
@@ -558,7 +578,8 @@
     // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
     GcRoot<mirror::Class>* dex_cache_resolved_types_;
 
-    // Pointer to JNI function registered to this method, or a function to resolve the JNI function.
+    // Pointer to JNI function registered to this method, or a function to resolve the JNI function,
+    // or the profiling data for non-native methods.
     void* entry_point_from_jni_;
 
     // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 3a4bccd..a36b0fb 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -52,13 +52,14 @@
   "SSA2Dalvik   ",
   "Dalvik2SSA   ",
   "DebugInfo    ",
-  "Successor    ",
   "RegAlloc     ",
   "Data         ",
-  "Preds        ",
   "STL          ",
   "Graph        ",
   "BasicBlock   ",
+  "Predecessors ",
+  "Successors   ",
+  "Dominated    ",
   "Instruction  ",
   "LoopInfo     ",
   "TryCatchInf  ",
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index af2bfbc..47defb4 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -62,13 +62,14 @@
   kArenaAllocSSAToDalvikMap,
   kArenaAllocDalvikToSSAMap,
   kArenaAllocDebugInfo,
-  kArenaAllocSuccessor,
   kArenaAllocRegAlloc,
   kArenaAllocData,
-  kArenaAllocPredecessors,
   kArenaAllocSTL,
   kArenaAllocGraph,
   kArenaAllocBasicBlock,
+  kArenaAllocPredecessors,
+  kArenaAllocSuccessors,
+  kArenaAllocDominated,
   kArenaAllocInstruction,
   kArenaAllocLoopInfo,
   kArenaAllocTryCatchInfo,
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index 901f25f..0949619 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -20,6 +20,8 @@
 #include <algorithm>
 #include <sstream>
 
+#include "base/logging.h"
+
 namespace art {
 
 // Sort and remove duplicates of an STL vector or deque.
@@ -94,6 +96,59 @@
   return os.str();
 }
 
+// Deleter using free() for use with std::unique_ptr<>. See also UniqueCPtr<> below.
+struct FreeDelete {
+  // NOTE: Deleting a const object is valid but free() takes a non-const pointer.
+  void operator()(const void* ptr) const {
+    free(const_cast<void*>(ptr));
+  }
+};
+
+// Alias for std::unique_ptr<> that uses the C function free() to delete objects.
+template <typename T>
+using UniqueCPtr = std::unique_ptr<T, FreeDelete>;
+
+// C++14 from-the-future import (std::make_unique)
+// Invoke the constructor of 'T' with the provided args, and wrap the result in a unique ptr.
+template <typename T, typename ... Args>
+std::unique_ptr<T> MakeUnique(Args&& ... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+
+// Find index of the first element with the specified value known to be in the container.
+template <typename Container, typename T>
+size_t IndexOfElement(const Container& container, const T& value) {
+  auto it = std::find(container.begin(), container.end(), value);
+  DCHECK(it != container.end());  // Must exist.
+  return std::distance(container.begin(), it);
+}
+
+// Remove the first element with the specified value known to be in the container.
+template <typename Container, typename T>
+void RemoveElement(Container& container, const T& value) {
+  auto it = std::find(container.begin(), container.end(), value);
+  DCHECK(it != container.end());  // Must exist.
+  container.erase(it);
+}
+
+// Replace the first element with the specified old_value known to be in the container.
+template <typename Container, typename T>
+void ReplaceElement(Container& container, const T& old_value, const T& new_value) {
+  auto it = std::find(container.begin(), container.end(), old_value);
+  DCHECK(it != container.end());  // Must exist.
+  *it = new_value;
+}
+
+// Search for an element with the specified value and return true if it was found, false otherwise.
+template <typename Container, typename T>
+bool ContainsElement(const Container& container, const T& value, size_t start_pos = 0u) {
+  DCHECK_LE(start_pos, container.size());
+  auto start = container.begin();
+  std::advance(start, start_pos);
+  auto it = std::find(start, container.end(), value);
+  return it != container.end();
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_STL_UTIL_H_
diff --git a/runtime/base/variant_map.h b/runtime/base/variant_map.h
index 1d7596a..82e5d2e 100644
--- a/runtime/base/variant_map.h
+++ b/runtime/base/variant_map.h
@@ -257,8 +257,7 @@
     if (ptr != nullptr) {
       return std::move(*ptr);
     } else {
-      TValue default_value = key.CreateDefaultValue();
-      return std::move(default_value);
+      return key.CreateDefaultValue();
     }
   }
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index e1aca2f..ad69676 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -218,6 +218,17 @@
                << " " << dex_pc_offset;
   }
 
+  // We only care about invokes in the Jit.
+  void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
+                                mirror::Object*,
+                                ArtMethod* method,
+                                uint32_t dex_pc,
+                                ArtMethod*)
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    LOG(ERROR) << "Unexpected invoke event in debugger " << PrettyMethod(method)
+               << " " << dex_pc;
+  }
+
  private:
   static bool IsReturn(ArtMethod* method, uint32_t dex_pc)
       SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -567,7 +578,7 @@
   }
 
   Runtime* runtime = Runtime::Current();
-  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+  ScopedSuspendAll ssa(__FUNCTION__);
   Thread* self = Thread::Current();
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
   CHECK_NE(old_state, kRunnable);
@@ -577,8 +588,6 @@
   instrumentation_events_ = 0;
   gDebuggerActive = true;
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
-  runtime->GetThreadList()->ResumeAll();
-
   LOG(INFO) << "Debugger is active";
 }
 
@@ -591,32 +600,32 @@
   // to kRunnable to avoid scoped object access transitions. Remove the debugger as a listener
   // and clear the object registry.
   Runtime* runtime = Runtime::Current();
-  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
   Thread* self = Thread::Current();
-  ThreadState old_state = self->SetStateUnsafe(kRunnable);
-
-  // Debugger may not be active at this point.
-  if (IsDebuggerActive()) {
-    {
-      // Since we're going to disable deoptimization, we clear the deoptimization requests queue.
-      // This prevents us from having any pending deoptimization request when the debugger attaches
-      // to us again while no event has been requested yet.
-      MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-      deoptimization_requests_.clear();
-      full_deoptimization_event_count_ = 0U;
+  {
+    ScopedSuspendAll ssa(__FUNCTION__);
+    ThreadState old_state = self->SetStateUnsafe(kRunnable);
+    // Debugger may not be active at this point.
+    if (IsDebuggerActive()) {
+      {
+        // Since we're going to disable deoptimization, we clear the deoptimization requests queue.
+        // This prevents us from having any pending deoptimization request when the debugger attaches
+        // to us again while no event has been requested yet.
+        MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+        deoptimization_requests_.clear();
+        full_deoptimization_event_count_ = 0U;
+      }
+      if (instrumentation_events_ != 0) {
+        runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
+                                                      instrumentation_events_);
+        instrumentation_events_ = 0;
+      }
+      if (RequiresDeoptimization()) {
+        runtime->GetInstrumentation()->DisableDeoptimization(kDbgInstrumentationKey);
+      }
+      gDebuggerActive = false;
     }
-    if (instrumentation_events_ != 0) {
-      runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
-                                                    instrumentation_events_);
-      instrumentation_events_ = 0;
-    }
-    if (RequiresDeoptimization()) {
-      runtime->GetInstrumentation()->DisableDeoptimization(kDbgInstrumentationKey);
-    }
-    gDebuggerActive = false;
+    CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   }
-  CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
-  runtime->GetThreadList()->ResumeAll();
 
   {
     ScopedObjectAccess soa(self);
@@ -740,9 +749,8 @@
   MonitorInfo monitor_info;
   {
     ScopedThreadSuspension sts(self, kSuspended);
-    Runtime::Current()->GetThreadList()->SuspendAll(__FUNCTION__);
+    ScopedSuspendAll ssa(__FUNCTION__);
     monitor_info = MonitorInfo(o);
-    Runtime::Current()->GetThreadList()->ResumeAll();
   }
   if (monitor_info.owner_ != nullptr) {
     expandBufAddObjectId(reply, gRegistry->Add(monitor_info.owner_->GetPeer()));
@@ -2435,6 +2443,8 @@
  private:
   const JDWP::FrameId frame_id_;
   JDWP::JdwpError error_;
+
+  DISALLOW_COPY_AND_ASSIGN(FindFrameVisitor);
 };
 
 JDWP::JdwpError Dbg::GetLocalValues(JDWP::Request* request, JDWP::ExpandBuf* pReply) {
@@ -2814,7 +2824,6 @@
   CatchLocationFinder(Thread* self, const Handle<mirror::Throwable>& exception, Context* context)
       SHARED_REQUIRES(Locks::mutator_lock_)
     : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-      self_(self),
       exception_(exception),
       handle_scope_(self),
       this_at_throw_(handle_scope_.NewHandle<mirror::Object>(nullptr)),
@@ -2844,7 +2853,7 @@
     }
 
     if (dex_pc != DexFile::kDexNoIndex) {
-      StackHandleScope<1> hs(self_);
+      StackHandleScope<1> hs(GetThread());
       uint32_t found_dex_pc;
       Handle<mirror::Class> exception_class(hs.NewHandle(exception_->GetClass()));
       bool unused_clear_exception;
@@ -2879,7 +2888,6 @@
   }
 
  private:
-  Thread* const self_;
   const Handle<mirror::Throwable>& exception_;
   StackHandleScope<1> handle_scope_;
   MutableHandle<mirror::Object> this_at_throw_;
@@ -3150,8 +3158,7 @@
   CHECK_EQ(self->GetState(), kRunnable);
   ScopedThreadSuspension sts(self, kWaitingForDeoptimization);
   // We need to suspend mutator threads first.
-  Runtime* const runtime = Runtime::Current();
-  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+  ScopedSuspendAll ssa(__FUNCTION__);
   const ThreadState old_state = self->SetStateUnsafe(kRunnable);
   {
     MutexLock mu(self, *Locks::deoptimization_lock_);
@@ -3163,7 +3170,6 @@
     deoptimization_requests_.clear();
   }
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
-  runtime->GetThreadList()->ResumeAll();
 }
 
 static bool IsMethodPossiblyInlined(Thread* self, ArtMethod* m)
@@ -3490,6 +3496,62 @@
   return instrumentation->IsDeoptimized(m);
 }
 
+struct NeedsDeoptimizationVisitor : public StackVisitor {
+ public:
+  explicit NeedsDeoptimizationVisitor(Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+    : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+      needs_deoptimization_(false) {}
+
+  bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    // The visitor is meant to be used when handling exception from compiled code only.
+    CHECK(!IsShadowFrame()) << "We only expect to visit compiled frame: " << PrettyMethod(GetMethod());
+    ArtMethod* method = GetMethod();
+    if (method == nullptr) {
+      // We reach an upcall and don't need to deoptimize this part of the stack (ManagedFragment)
+      // so we can stop the visit.
+      DCHECK(!needs_deoptimization_);
+      return false;
+    }
+    if (Runtime::Current()->GetInstrumentation()->InterpretOnly()) {
+      // We found a compiled frame in the stack but instrumentation is set to interpret
+      // everything: we need to deoptimize.
+      needs_deoptimization_ = true;
+      return false;
+    }
+    if (Runtime::Current()->GetInstrumentation()->IsDeoptimized(method)) {
+      // We found a deoptimized method in the stack.
+      needs_deoptimization_ = true;
+      return false;
+    }
+    return true;
+  }
+
+  bool NeedsDeoptimization() const {
+    return needs_deoptimization_;
+  }
+
+ private:
+  // Do we need to deoptimize the stack?
+  bool needs_deoptimization_;
+
+  DISALLOW_COPY_AND_ASSIGN(NeedsDeoptimizationVisitor);
+};
+
+// Do we need to deoptimize the stack to handle an exception?
+bool Dbg::IsForcedInterpreterNeededForExceptionImpl(Thread* thread) {
+  const SingleStepControl* const ssc = thread->GetSingleStepControl();
+  if (ssc != nullptr) {
+    // We deopt to step into the catch handler.
+    return true;
+  }
+  // Deoptimization is required if at least one method in the stack needs it. However we
+  // skip frames that will be unwound (thus not executed).
+  NeedsDeoptimizationVisitor visitor(thread);
+  visitor.WalkStack(true);  // includes upcall.
+  return visitor.NeedsDeoptimization();
+}
+
 // Scoped utility class to suspend a thread so that we may do tasks such as walk its stack. Doesn't
 // cause suspension if the thread is the current thread.
 class ScopedDebuggerThreadSuspension {
@@ -4657,13 +4719,9 @@
         // Need to acquire the mutator lock before the heap bitmap lock with exclusive access since
         // RosAlloc's internal logic doesn't know to release and reacquire the heap bitmap lock.
         ScopedThreadSuspension sts(self, kSuspended);
-        ThreadList* tl = Runtime::Current()->GetThreadList();
-        tl->SuspendAll(__FUNCTION__);
-        {
-          ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-          space->AsRosAllocSpace()->Walk(HeapChunkContext::HeapChunkJavaCallback, &context);
-        }
-        tl->ResumeAll();
+        ScopedSuspendAll ssa(__FUNCTION__);
+        ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+        space->AsRosAllocSpace()->Walk(HeapChunkContext::HeapChunkJavaCallback, &context);
       } else if (space->IsBumpPointerSpace()) {
         ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
         context.SetChunkOverhead(0);
@@ -4673,13 +4731,11 @@
         heap->IncrementDisableMovingGC(self);
         {
           ScopedThreadSuspension sts(self, kSuspended);
-          ThreadList* tl = Runtime::Current()->GetThreadList();
-          tl->SuspendAll(__FUNCTION__);
+          ScopedSuspendAll ssa(__FUNCTION__);
           ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
           context.SetChunkOverhead(0);
           space->AsRegionSpace()->Walk(BumpPointerSpaceCallback, &context);
           HeapChunkContext::HeapChunkJavaCallback(nullptr, nullptr, 0, &context);
-          tl->ResumeAll();
         }
         heap->DecrementDisableMovingGC(self);
       } else {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index a9fa6ce..8278fc6 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -576,6 +576,19 @@
     return IsForcedInterpreterNeededForUpcallImpl(thread, m);
   }
 
+  // Indicates whether we need to force the use of interpreter when handling an
+  // exception. This allows to deoptimize the stack and continue execution with
+  // the interpreter.
+  // Note: the interpreter will start by handling the exception when executing
+  // the deoptimized frames.
+  static bool IsForcedInterpreterNeededForException(Thread* thread)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!IsDebuggerActive()) {
+      return false;
+    }
+    return IsForcedInterpreterNeededForExceptionImpl(thread);
+  }
+
   // Single-stepping.
   static JDWP::JdwpError ConfigureStep(JDWP::ObjectId thread_id, JDWP::JdwpStepSize size,
                                        JDWP::JdwpStepDepth depth)
@@ -734,6 +747,9 @@
   static bool IsForcedInterpreterNeededForUpcallImpl(Thread* thread, ArtMethod* m)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static bool IsForcedInterpreterNeededForExceptionImpl(Thread* thread)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Indicates whether the debugger is making requests.
   static bool gDebuggerActive;
 
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 4cb9a3b..ae62e2b 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -31,6 +31,7 @@
 #include "art_method-inl.h"
 #include "base/hash_map.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 98d4e59..47e5c12 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -1275,6 +1275,8 @@
   // pointer to the OatDexFile it was loaded from. Otherwise oat_dex_file_ is
   // null.
   const OatDexFile* oat_dex_file_;
+
+  friend class DexFileVerifierTest;
 };
 
 struct DexFileReference {
@@ -1459,6 +1461,9 @@
   uint32_t GetMethodCodeItemOffset() const {
     return method_.code_off_;
   }
+  const uint8_t* DataPointer() const {
+    return ptr_pos_;
+  }
   const uint8_t* EndDataPointer() const {
     CHECK(!HasNext());
     return ptr_pos_;
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index eec4983..09416cc 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -16,7 +16,9 @@
 
 #include "dex_file_verifier.h"
 
+#include <inttypes.h>
 #include <zlib.h>
+
 #include <memory>
 
 #include "base/stringprintf.h"
@@ -444,66 +446,86 @@
   return true;
 }
 
-bool DexFileVerifier::CheckClassDataItemField(uint32_t idx, uint32_t access_flags,
+bool DexFileVerifier::CheckClassDataItemField(uint32_t idx,
+                                              uint32_t access_flags,
+                                              uint32_t class_access_flags,
+                                              uint16_t class_type_index,
                                               bool expect_static) {
+  // Check for overflow.
   if (!CheckIndex(idx, header_->field_ids_size_, "class_data_item field_idx")) {
     return false;
   }
 
+  // Check that it's the right class.
+  uint16_t my_class_index =
+      (reinterpret_cast<const DexFile::FieldId*>(begin_ + header_->field_ids_off_) + idx)->
+          class_idx_;
+  if (class_type_index != my_class_index) {
+    ErrorStringPrintf("Field's class index unexpected, %" PRIu16 "vs %" PRIu16,
+                      my_class_index,
+                      class_type_index);
+    return false;
+  }
+
+  // Check that it falls into the right class-data list.
   bool is_static = (access_flags & kAccStatic) != 0;
   if (UNLIKELY(is_static != expect_static)) {
     ErrorStringPrintf("Static/instance field not in expected list");
     return false;
   }
 
-  if (UNLIKELY((access_flags & ~kAccJavaFlagsMask) != 0)) {
-    ErrorStringPrintf("Bad class_data_item field access_flags %x", access_flags);
+  // Check field access flags.
+  std::string error_msg;
+  if (!CheckFieldAccessFlags(access_flags, class_access_flags, &error_msg)) {
+    ErrorStringPrintf("%s", error_msg.c_str());
     return false;
   }
 
   return true;
 }
 
-bool DexFileVerifier::CheckClassDataItemMethod(uint32_t idx, uint32_t access_flags,
+bool DexFileVerifier::CheckClassDataItemMethod(uint32_t idx,
+                                               uint32_t access_flags,
+                                               uint32_t class_access_flags,
+                                               uint16_t class_type_index,
                                                uint32_t code_offset,
-                                               std::unordered_set<uint32_t>& direct_method_indexes,
+                                               std::unordered_set<uint32_t>* direct_method_indexes,
                                                bool expect_direct) {
+  DCHECK(direct_method_indexes != nullptr);
+  // Check for overflow.
   if (!CheckIndex(idx, header_->method_ids_size_, "class_data_item method_idx")) {
     return false;
   }
 
-  bool is_direct = (access_flags & (kAccStatic | kAccPrivate | kAccConstructor)) != 0;
-  bool expect_code = (access_flags & (kAccNative | kAccAbstract)) == 0;
-  bool is_synchronized = (access_flags & kAccSynchronized) != 0;
-  bool allow_synchronized = (access_flags & kAccNative) != 0;
-
-  if (UNLIKELY(is_direct != expect_direct)) {
-    ErrorStringPrintf("Direct/virtual method not in expected list");
+  // Check that it's the right class.
+  uint16_t my_class_index =
+      (reinterpret_cast<const DexFile::MethodId*>(begin_ + header_->method_ids_off_) + idx)->
+          class_idx_;
+  if (class_type_index != my_class_index) {
+    ErrorStringPrintf("Method's class index unexpected, %" PRIu16 "vs %" PRIu16,
+                      my_class_index,
+                      class_type_index);
     return false;
   }
 
+  // Check that it's not defined as both direct and virtual.
   if (expect_direct) {
-    direct_method_indexes.insert(idx);
-  } else if (direct_method_indexes.find(idx) != direct_method_indexes.end()) {
+    direct_method_indexes->insert(idx);
+  } else if (direct_method_indexes->find(idx) != direct_method_indexes->end()) {
     ErrorStringPrintf("Found virtual method with same index as direct method: %d", idx);
     return false;
   }
 
-  constexpr uint32_t access_method_mask = kAccJavaFlagsMask | kAccConstructor |
-      kAccDeclaredSynchronized;
-  if (UNLIKELY(((access_flags & ~access_method_mask) != 0) ||
-               (is_synchronized && !allow_synchronized))) {
-    ErrorStringPrintf("Bad class_data_item method access_flags %x", access_flags);
-    return false;
-  }
-
-  if (UNLIKELY(expect_code && (code_offset == 0))) {
-    ErrorStringPrintf("Unexpected zero value for class_data_item method code_off with access "
-                      "flags %x", access_flags);
-    return false;
-  } else if (UNLIKELY(!expect_code && (code_offset != 0))) {
-    ErrorStringPrintf("Unexpected non-zero value %x for class_data_item method code_off"
-                      " with access flags %x", code_offset, access_flags);
+  // Check method access flags.
+  bool has_code = (code_offset != 0);
+  std::string error_msg;
+  if (!CheckMethodAccessFlags(idx,
+                              access_flags,
+                              class_access_flags,
+                              has_code,
+                              expect_direct,
+                              &error_msg)) {
+    ErrorStringPrintf("%s", error_msg.c_str());
     return false;
   }
 
@@ -689,60 +711,185 @@
   return true;
 }
 
+bool DexFileVerifier::FindClassFlags(uint32_t index,
+                                     bool is_field,
+                                     uint16_t* class_type_index,
+                                     uint32_t* class_access_flags) {
+  DCHECK(class_type_index != nullptr);
+  DCHECK(class_access_flags != nullptr);
+
+  // First check if the index is valid.
+  if (index >= (is_field ? header_->field_ids_size_ : header_->method_ids_size_)) {
+    return false;
+  }
+
+  // Next get the type index.
+  if (is_field) {
+    *class_type_index =
+        (reinterpret_cast<const DexFile::FieldId*>(begin_ + header_->field_ids_off_) + index)->
+            class_idx_;
+  } else {
+    *class_type_index =
+        (reinterpret_cast<const DexFile::MethodId*>(begin_ + header_->method_ids_off_) + index)->
+            class_idx_;
+  }
+
+  // Check if that is valid.
+  if (*class_type_index >= header_->type_ids_size_) {
+    return false;
+  }
+
+  // Now search for the class def. This is basically a specialized version of the DexFile code, as
+  // we should not trust that this is a valid DexFile just yet.
+  const DexFile::ClassDef* class_def_begin =
+      reinterpret_cast<const DexFile::ClassDef*>(begin_ + header_->class_defs_off_);
+  for (size_t i = 0; i < header_->class_defs_size_; ++i) {
+    const DexFile::ClassDef* class_def = class_def_begin + i;
+    if (class_def->class_idx_ == *class_type_index) {
+      *class_access_flags = class_def->access_flags_;
+      return true;
+    }
+  }
+
+  // Didn't find the class-def, not defined here...
+  return false;
+}
+
+bool DexFileVerifier::CheckOrderAndGetClassFlags(bool is_field,
+                                                 const char* type_descr,
+                                                 uint32_t curr_index,
+                                                 uint32_t prev_index,
+                                                 bool* have_class,
+                                                 uint16_t* class_type_index,
+                                                 uint32_t* class_access_flags) {
+  if (curr_index < prev_index) {
+    ErrorStringPrintf("out-of-order %s indexes %" PRIu32 " and %" PRIu32,
+                      type_descr,
+                      prev_index,
+                      curr_index);
+    return false;
+  }
+
+  if (!*have_class) {
+    *have_class = FindClassFlags(curr_index, is_field, class_type_index, class_access_flags);
+    if (!*have_class) {
+      // Should have really found one.
+      ErrorStringPrintf("could not find declaring class for %s index %" PRIu32,
+                        type_descr,
+                        curr_index);
+      return false;
+    }
+  }
+  return true;
+}
+
+template <bool kStatic>
+bool DexFileVerifier::CheckIntraClassDataItemFields(ClassDataItemIterator* it,
+                                                    bool* have_class,
+                                                    uint16_t* class_type_index,
+                                                    uint32_t* class_access_flags) {
+  DCHECK(it != nullptr);
+  // These calls use the raw access flags to check whether the whole dex field is valid.
+  uint32_t prev_index = 0;
+  for (; kStatic ? it->HasNextStaticField() : it->HasNextInstanceField(); it->Next()) {
+    uint32_t curr_index = it->GetMemberIndex();
+    if (!CheckOrderAndGetClassFlags(true,
+                                    kStatic ? "static field" : "instance field",
+                                    curr_index,
+                                    prev_index,
+                                    have_class,
+                                    class_type_index,
+                                    class_access_flags)) {
+      return false;
+    }
+    prev_index = curr_index;
+
+    if (!CheckClassDataItemField(curr_index,
+                                 it->GetRawMemberAccessFlags(),
+                                 *class_access_flags,
+                                 *class_type_index,
+                                 kStatic)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+template <bool kDirect>
+bool DexFileVerifier::CheckIntraClassDataItemMethods(
+    ClassDataItemIterator* it,
+    std::unordered_set<uint32_t>* direct_method_indexes,
+    bool* have_class,
+    uint16_t* class_type_index,
+    uint32_t* class_access_flags) {
+  uint32_t prev_index = 0;
+  for (; kDirect ? it->HasNextDirectMethod() : it->HasNextVirtualMethod(); it->Next()) {
+    uint32_t curr_index = it->GetMemberIndex();
+    if (!CheckOrderAndGetClassFlags(false,
+                                    kDirect ? "direct method" : "virtual method",
+                                    curr_index,
+                                    prev_index,
+                                    have_class,
+                                    class_type_index,
+                                    class_access_flags)) {
+      return false;
+    }
+    prev_index = curr_index;
+
+    if (!CheckClassDataItemMethod(curr_index,
+                                  it->GetRawMemberAccessFlags(),
+                                  *class_access_flags,
+                                  *class_type_index,
+                                  it->GetMethodCodeItemOffset(),
+                                  direct_method_indexes,
+                                  kDirect)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
 bool DexFileVerifier::CheckIntraClassDataItem() {
   ClassDataItemIterator it(*dex_file_, ptr_);
   std::unordered_set<uint32_t> direct_method_indexes;
 
-  // These calls use the raw access flags to check whether the whole dex field is valid.
-  uint32_t prev_index = 0;
-  for (; it.HasNextStaticField(); it.Next()) {
-    uint32_t curr_index = it.GetMemberIndex();
-    if (curr_index < prev_index) {
-      ErrorStringPrintf("out-of-order static field indexes %d and %d", prev_index, curr_index);
-      return false;
-    }
-    prev_index = curr_index;
-    if (!CheckClassDataItemField(curr_index, it.GetRawMemberAccessFlags(), true)) {
-      return false;
-    }
+  // This code is complicated by the fact that we don't directly know which class this belongs to.
+  // So we need to explicitly search with the first item we find (either field or method), and then,
+  // as the lookup is expensive, cache the result.
+  bool have_class = false;
+  uint16_t class_type_index;
+  uint32_t class_access_flags;
+
+  // Check fields.
+  if (!CheckIntraClassDataItemFields<true>(&it,
+                                           &have_class,
+                                           &class_type_index,
+                                           &class_access_flags)) {
+    return false;
   }
-  prev_index = 0;
-  for (; it.HasNextInstanceField(); it.Next()) {
-    uint32_t curr_index = it.GetMemberIndex();
-    if (curr_index < prev_index) {
-      ErrorStringPrintf("out-of-order instance field indexes %d and %d", prev_index, curr_index);
-      return false;
-    }
-    prev_index = curr_index;
-    if (!CheckClassDataItemField(curr_index, it.GetRawMemberAccessFlags(), false)) {
-      return false;
-    }
+  if (!CheckIntraClassDataItemFields<false>(&it,
+                                            &have_class,
+                                            &class_type_index,
+                                            &class_access_flags)) {
+    return false;
   }
-  prev_index = 0;
-  for (; it.HasNextDirectMethod(); it.Next()) {
-    uint32_t curr_index = it.GetMemberIndex();
-    if (curr_index < prev_index) {
-      ErrorStringPrintf("out-of-order direct method indexes %d and %d", prev_index, curr_index);
-      return false;
-    }
-    prev_index = curr_index;
-    if (!CheckClassDataItemMethod(curr_index, it.GetRawMemberAccessFlags(),
-        it.GetMethodCodeItemOffset(), direct_method_indexes, true)) {
-      return false;
-    }
+
+  // Check methods.
+  if (!CheckIntraClassDataItemMethods<true>(&it,
+                                            &direct_method_indexes,
+                                            &have_class,
+                                            &class_type_index,
+                                            &class_access_flags)) {
+    return false;
   }
-  prev_index = 0;
-  for (; it.HasNextVirtualMethod(); it.Next()) {
-    uint32_t curr_index = it.GetMemberIndex();
-    if (curr_index < prev_index) {
-      ErrorStringPrintf("out-of-order virtual method indexes %d and %d", prev_index, curr_index);
-      return false;
-    }
-    prev_index = curr_index;
-    if (!CheckClassDataItemMethod(curr_index, it.GetRawMemberAccessFlags(),
-        it.GetMethodCodeItemOffset(), direct_method_indexes, false)) {
-      return false;
-    }
+  if (!CheckIntraClassDataItemMethods<false>(&it,
+                                             &direct_method_indexes,
+                                             &have_class,
+                                             &class_type_index,
+                                             &class_access_flags)) {
+    return false;
   }
 
   ptr_ = it.EndDataPointer();
@@ -2149,4 +2296,259 @@
   va_end(ap);
 }
 
+// Fields and methods may have only one of public/protected/private.
+static bool CheckAtMostOneOfPublicProtectedPrivate(uint32_t flags) {
+  size_t count = (((flags & kAccPublic) == 0) ? 0 : 1) +
+                 (((flags & kAccProtected) == 0) ? 0 : 1) +
+                 (((flags & kAccPrivate) == 0) ? 0 : 1);
+  return count <= 1;
+}
+
+bool DexFileVerifier::CheckFieldAccessFlags(uint32_t field_access_flags,
+                                            uint32_t class_access_flags,
+                                            std::string* error_msg) {
+  // Generally sort out >16-bit flags.
+  if ((field_access_flags & ~kAccJavaFlagsMask) != 0) {
+    *error_msg = StringPrintf("Bad class_data_item field access_flags %x", field_access_flags);
+    return false;
+  }
+
+  // Flags allowed on fields, in general. Other lower-16-bit flags are to be ignored.
+  constexpr uint32_t kFieldAccessFlags = kAccPublic |
+                                         kAccPrivate |
+                                         kAccProtected |
+                                         kAccStatic |
+                                         kAccFinal |
+                                         kAccVolatile |
+                                         kAccTransient |
+                                         kAccSynthetic |
+                                         kAccEnum;
+
+  // Fields may have only one of public/protected/final.
+  if (!CheckAtMostOneOfPublicProtectedPrivate(field_access_flags)) {
+    *error_msg = StringPrintf("Field may have only one of public/protected/private, %x",
+                              field_access_flags);
+    return false;
+  }
+
+  // Interfaces have a pretty restricted list.
+  if ((class_access_flags & kAccInterface) != 0) {
+    // Interface fields must be public final static.
+    constexpr uint32_t kPublicFinalStatic = kAccPublic | kAccFinal | kAccStatic;
+    if ((field_access_flags & kPublicFinalStatic) != kPublicFinalStatic) {
+      *error_msg = StringPrintf("Interface field is not public final static: %x",
+                                field_access_flags);
+      return false;
+    }
+    // Interface fields may be synthetic, but may not have other flags.
+    constexpr uint32_t kDisallowed = ~(kPublicFinalStatic | kAccSynthetic);
+    if ((field_access_flags & kFieldAccessFlags & kDisallowed) != 0) {
+      *error_msg = StringPrintf("Interface field has disallowed flag: %x", field_access_flags);
+      return false;
+    }
+    return true;
+  }
+
+  // Volatile fields may not be final.
+  constexpr uint32_t kVolatileFinal = kAccVolatile | kAccFinal;
+  if ((field_access_flags & kVolatileFinal) == kVolatileFinal) {
+    *error_msg = "Fields may not be volatile and final";
+    return false;
+  }
+
+  return true;
+}
+
+// Try to find the name of the method with the given index. We do not want to rely on DexFile
+// infrastructure at this point, so do it all by hand. begin and header correspond to begin_ and
+// header_ of the DexFileVerifier. str will contain the pointer to the method name on success
+// (flagged by the return value), otherwise error_msg will contain an error string.
+static bool FindMethodName(uint32_t method_index,
+                           const uint8_t* begin,
+                           const DexFile::Header* header,
+                           const char** str,
+                           std::string* error_msg) {
+  if (method_index >= header->method_ids_size_) {
+    *error_msg = "Method index not available for method flags verification";
+    return false;
+  }
+  uint32_t string_idx =
+      (reinterpret_cast<const DexFile::MethodId*>(begin + header->method_ids_off_) +
+          method_index)->name_idx_;
+  if (string_idx >= header->string_ids_size_) {
+    *error_msg = "String index not available for method flags verification";
+    return false;
+  }
+  uint32_t string_off =
+      (reinterpret_cast<const DexFile::StringId*>(begin + header->string_ids_off_) + string_idx)->
+          string_data_off_;
+  if (string_off >= header->file_size_) {
+    *error_msg = "String offset out of bounds for method flags verification";
+    return false;
+  }
+  const uint8_t* str_data_ptr = begin + string_off;
+  DecodeUnsignedLeb128(&str_data_ptr);
+  *str = reinterpret_cast<const char*>(str_data_ptr);
+  return true;
+}
+
+bool DexFileVerifier::CheckMethodAccessFlags(uint32_t method_index,
+                                             uint32_t method_access_flags,
+                                             uint32_t class_access_flags,
+                                             bool has_code,
+                                             bool expect_direct,
+                                             std::string* error_msg) {
+  // Generally sort out >16-bit flags, except dex knows Constructor and DeclaredSynchronized.
+  constexpr uint32_t kAllMethodFlags =
+      kAccJavaFlagsMask | kAccConstructor | kAccDeclaredSynchronized;
+  if ((method_access_flags & ~kAllMethodFlags) != 0) {
+    *error_msg = StringPrintf("Bad class_data_item method access_flags %x", method_access_flags);
+    return false;
+  }
+
+  // Flags allowed on fields, in general. Other lower-16-bit flags are to be ignored.
+  constexpr uint32_t kMethodAccessFlags = kAccPublic |
+                                          kAccPrivate |
+                                          kAccProtected |
+                                          kAccStatic |
+                                          kAccFinal |
+                                          kAccSynthetic |
+                                          kAccSynchronized |
+                                          kAccBridge |
+                                          kAccVarargs |
+                                          kAccNative |
+                                          kAccAbstract |
+                                          kAccStrict;
+
+  // Methods may have only one of public/protected/final.
+  if (!CheckAtMostOneOfPublicProtectedPrivate(method_access_flags)) {
+    *error_msg = StringPrintf("Method may have only one of public/protected/private, %x",
+                              method_access_flags);
+    return false;
+  }
+
+  // Try to find the name, to check for constructor properties.
+  const char* str;
+  if (!FindMethodName(method_index, begin_, header_, &str, error_msg)) {
+    return false;
+  }
+  bool is_init_by_name = false;
+  constexpr const char* kInitName = "<init>";
+  size_t str_offset = (reinterpret_cast<const uint8_t*>(str) - begin_);
+  if (header_->file_size_ - str_offset >= sizeof(kInitName)) {
+    is_init_by_name = strcmp(kInitName, str) == 0;
+  }
+  bool is_clinit_by_name = false;
+  constexpr const char* kClinitName = "<clinit>";
+  if (header_->file_size_ - str_offset >= sizeof(kClinitName)) {
+    is_clinit_by_name = strcmp(kClinitName, str) == 0;
+  }
+  bool is_constructor = is_init_by_name || is_clinit_by_name;
+
+  // Only methods named "<clinit>" or "<init>" may be marked constructor. Note: we cannot enforce
+  // the reverse for backwards compatibility reasons.
+  if (((method_access_flags & kAccConstructor) != 0) && !is_constructor) {
+    *error_msg = StringPrintf("Method %" PRIu32 " is marked constructor, but doesn't match name",
+                              method_index);
+    return false;
+  }
+  // Check that the static constructor (= static initializer) is named "<clinit>" and that the
+  // instance constructor is called "<init>".
+  if (is_constructor) {
+    bool is_static = (method_access_flags & kAccStatic) != 0;
+    if (is_static ^ is_clinit_by_name) {
+      *error_msg = StringPrintf("Constructor %" PRIu32 " is not flagged correctly wrt/ static.",
+                                method_index);
+      return false;
+    }
+  }
+  // Check that static and private methods, as well as constructors, are in the direct methods list,
+  // and other methods in the virtual methods list.
+  bool is_direct = (method_access_flags & (kAccStatic | kAccPrivate)) != 0 || is_constructor;
+  if (is_direct != expect_direct) {
+    *error_msg = StringPrintf("Direct/virtual method %" PRIu32 " not in expected list %d",
+                              method_index,
+                              expect_direct);
+    return false;
+  }
+
+
+  // From here on out it is easier to mask out the bits we're supposed to ignore.
+  method_access_flags &= kMethodAccessFlags;
+
+  // If there aren't any instructions, make sure that's expected.
+  if (!has_code) {
+    // Only native or abstract methods may not have code.
+    if ((method_access_flags & (kAccNative | kAccAbstract)) == 0) {
+      *error_msg = StringPrintf("Method %" PRIu32 " has no code, but is not marked native or "
+                                "abstract",
+                                method_index);
+      return false;
+    }
+    // Constructors must always have code.
+    if (is_constructor) {
+      *error_msg = StringPrintf("Constructor %u must not be abstract or native", method_index);
+      return false;
+    }
+    if ((method_access_flags & kAccAbstract) != 0) {
+      // Abstract methods are not allowed to have the following flags.
+      constexpr uint32_t kForbidden =
+          kAccPrivate | kAccStatic | kAccFinal | kAccNative | kAccStrict | kAccSynchronized;
+      if ((method_access_flags & kForbidden) != 0) {
+        *error_msg = StringPrintf("Abstract method %" PRIu32 " has disallowed access flags %x",
+                                  method_index,
+                                  method_access_flags);
+        return false;
+      }
+      // Abstract methods must be in an abstract class or interface.
+      if ((class_access_flags & (kAccInterface | kAccAbstract)) == 0) {
+        *error_msg = StringPrintf("Method %" PRIu32 " is abstract, but the declaring class "
+                                  "is neither abstract nor an interface", method_index);
+        return false;
+      }
+    }
+    // Interfaces are special.
+    if ((class_access_flags & kAccInterface) != 0) {
+      // Interface methods must be public and abstract.
+      if ((method_access_flags & (kAccPublic | kAccAbstract)) != (kAccPublic | kAccAbstract)) {
+        *error_msg = StringPrintf("Interface method %" PRIu32 " is not public and abstract",
+                                  method_index);
+        return false;
+      }
+      // At this point, we know the method is public and abstract. This means that all the checks
+      // for invalid combinations above applies. In addition, interface methods must not be
+      // protected. This is caught by the check for only-one-of-public-protected-private.
+    }
+    return true;
+  }
+
+  // When there's code, the method must not be native or abstract.
+  if ((method_access_flags & (kAccNative | kAccAbstract)) != 0) {
+    *error_msg = StringPrintf("Method %" PRIu32 " has code, but is marked native or abstract",
+                              method_index);
+    return false;
+  }
+
+  // Only the static initializer may have code in an interface.
+  if (((class_access_flags & kAccInterface) != 0) && !is_clinit_by_name) {
+    *error_msg = StringPrintf("Non-clinit interface method %" PRIu32 " should not have code",
+                              method_index);
+    return false;
+  }
+
+  // Instance constructors must not be synchronized and a few other flags.
+  if (is_init_by_name) {
+    static constexpr uint32_t kInitAllowed =
+        kAccPrivate | kAccProtected | kAccPublic | kAccStrict | kAccVarargs | kAccSynthetic;
+    if ((method_access_flags & ~kInitAllowed) != 0) {
+      *error_msg = StringPrintf("Constructor %" PRIu32 " flagged inappropriately %x",
+                                method_index,
+                                method_access_flags);
+      return false;
+    }
+  }
+
+  return true;
+}
+
 }  // namespace art
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index ccc40d4..4f15357 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -57,16 +57,48 @@
   uint32_t ReadUnsignedLittleEndian(uint32_t size);
   bool CheckAndGetHandlerOffsets(const DexFile::CodeItem* code_item,
                                  uint32_t* handler_offsets, uint32_t handlers_size);
-  bool CheckClassDataItemField(uint32_t idx, uint32_t access_flags, bool expect_static);
-  bool CheckClassDataItemMethod(uint32_t idx, uint32_t access_flags, uint32_t code_offset,
-                                std::unordered_set<uint32_t>& direct_method_indexes,
+  bool CheckClassDataItemField(uint32_t idx,
+                               uint32_t access_flags,
+                               uint32_t class_access_flags,
+                               uint16_t class_type_index,
+                               bool expect_static);
+  bool CheckClassDataItemMethod(uint32_t idx,
+                                uint32_t access_flags,
+                                uint32_t class_access_flags,
+                                uint16_t class_type_index,
+                                uint32_t code_offset,
+                                std::unordered_set<uint32_t>* direct_method_indexes,
                                 bool expect_direct);
+  bool CheckOrderAndGetClassFlags(bool is_field,
+                                  const char* type_descr,
+                                  uint32_t curr_index,
+                                  uint32_t prev_index,
+                                  bool* have_class,
+                                  uint16_t* class_type_index,
+                                  uint32_t* class_access_flags);
+
   bool CheckPadding(size_t offset, uint32_t aligned_offset);
   bool CheckEncodedValue();
   bool CheckEncodedArray();
   bool CheckEncodedAnnotation();
 
   bool CheckIntraClassDataItem();
+  // Check all fields of the given type from the given iterator. Load the class data from the first
+  // field, if necessary (and return it), or use the given values.
+  template <bool kStatic>
+  bool CheckIntraClassDataItemFields(ClassDataItemIterator* it,
+                                     bool* have_class,
+                                     uint16_t* class_type_index,
+                                     uint32_t* class_access_flags);
+  // Check all methods of the given type from the given iterator. Load the class data from the first
+  // method, if necessary (and return it), or use the given values.
+  template <bool kDirect>
+  bool CheckIntraClassDataItemMethods(ClassDataItemIterator* it,
+                                      std::unordered_set<uint32_t>* direct_method_indexes,
+                                      bool* have_class,
+                                      uint16_t* class_type_index,
+                                      uint32_t* class_access_flags);
+
   bool CheckIntraCodeItem();
   bool CheckIntraStringDataItem();
   bool CheckIntraDebugInfoItem();
@@ -112,6 +144,31 @@
   void ErrorStringPrintf(const char* fmt, ...)
       __attribute__((__format__(__printf__, 2, 3))) COLD_ATTR;
 
+  // Retrieve class index and class access flag from the given member. index is the member index,
+  // which is taken as either a field or a method index (as designated by is_field). The result,
+  // if the member and declaring class could be found, is stored in class_type_index and
+  // class_access_flags.
+  // This is an expensive lookup, as we have to find the class-def by type index, which is a
+  // linear search. The output values should thus be cached by the caller.
+  bool FindClassFlags(uint32_t index,
+                      bool is_field,
+                      uint16_t* class_type_index,
+                      uint32_t* class_access_flags);
+
+  // Check validity of the given access flags, interpreted for a field in the context of a class
+  // with the given second access flags.
+  static bool CheckFieldAccessFlags(uint32_t field_access_flags,
+                                    uint32_t class_access_flags,
+                                    std::string* error_msg);
+  // Check validity of the given method and access flags, in the context of a class with the given
+  // second access flags.
+  bool CheckMethodAccessFlags(uint32_t method_index,
+                              uint32_t method_access_flags,
+                              uint32_t class_access_flags,
+                              bool has_code,
+                              bool expect_direct,
+                              std::string* error_msg);
+
   const DexFile* const dex_file_;
   const uint8_t* const begin_;
   const size_t size_;
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 9f1ffec..1b529c9 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -18,18 +18,20 @@
 
 #include "sys/mman.h"
 #include "zlib.h"
+#include <functional>
 #include <memory>
 
 #include "base/unix_file/fd_file.h"
+#include "base/bit_utils.h"
 #include "base/macros.h"
 #include "common_runtime_test.h"
+#include "dex_file-inl.h"
+#include "leb128.h"
 #include "scoped_thread_state_change.h"
 #include "thread-inl.h"
 
 namespace art {
 
-class DexFileVerifierTest : public CommonRuntimeTest {};
-
 static const uint8_t kBase64Map[256] = {
   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
   255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
@@ -101,6 +103,64 @@
   return dst.release();
 }
 
+static void FixUpChecksum(uint8_t* dex_file) {
+  DexFile::Header* header = reinterpret_cast<DexFile::Header*>(dex_file);
+  uint32_t expected_size = header->file_size_;
+  uint32_t adler_checksum = adler32(0L, Z_NULL, 0);
+  const uint32_t non_sum = sizeof(DexFile::Header::magic_) + sizeof(DexFile::Header::checksum_);
+  const uint8_t* non_sum_ptr = dex_file + non_sum;
+  adler_checksum = adler32(adler_checksum, non_sum_ptr, expected_size - non_sum);
+  header->checksum_ = adler_checksum;
+}
+
+// Custom deleter. Necessary to clean up the memory we use (to be able to mutate).
+struct DexFileDeleter {
+  void operator()(DexFile* in) {
+    if (in != nullptr) {
+      delete in->Begin();
+      delete in;
+    }
+  }
+};
+
+using DexFileUniquePtr = std::unique_ptr<DexFile, DexFileDeleter>;
+
+class DexFileVerifierTest : public CommonRuntimeTest {
+ protected:
+  void VerifyModification(const char* dex_file_base64_content,
+                          const char* location,
+                          std::function<void(DexFile*)> f,
+                          const char* expected_error) {
+    DexFileUniquePtr dex_file(WrapAsDexFile(dex_file_base64_content));
+    f(dex_file.get());
+    FixUpChecksum(const_cast<uint8_t*>(dex_file->Begin()));
+
+    std::string error_msg;
+    bool success = DexFileVerifier::Verify(dex_file.get(),
+                                           dex_file->Begin(),
+                                           dex_file->Size(),
+                                           location,
+                                           &error_msg);
+    if (expected_error == nullptr) {
+      EXPECT_TRUE(success) << error_msg;
+    } else {
+      EXPECT_FALSE(success) << "Expected " << expected_error;
+      if (!success) {
+        EXPECT_NE(error_msg.find(expected_error), std::string::npos) << error_msg;
+      }
+    }
+  }
+
+ private:
+  static DexFile* WrapAsDexFile(const char* dex_file_content_in_base_64) {
+    // Decode base64.
+    size_t length;
+    uint8_t* dex_bytes = DecodeBase64(dex_file_content_in_base_64, &length);
+    CHECK(dex_bytes != nullptr);
+    return new DexFile(dex_bytes, length, "tmp", 0, nullptr, nullptr);
+  }
+};
+
 static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64,
                                                         const char* location,
                                                         std::string* error_msg) {
@@ -133,7 +193,6 @@
   return dex_file;
 }
 
-
 // For reference.
 static const char kGoodTestDex[] =
     "ZGV4CjAzNQDrVbyVkxX1HljTznNf95AglkUAhQuFtmKkAgAAcAAAAHhWNBIAAAAAAAAAAAQCAAAN"
@@ -157,92 +216,1003 @@
   ASSERT_TRUE(raw.get() != nullptr) << error_msg;
 }
 
-static void FixUpChecksum(uint8_t* dex_file) {
-  DexFile::Header* header = reinterpret_cast<DexFile::Header*>(dex_file);
-  uint32_t expected_size = header->file_size_;
-  uint32_t adler_checksum = adler32(0L, Z_NULL, 0);
-  const uint32_t non_sum = sizeof(DexFile::Header::magic_) + sizeof(DexFile::Header::checksum_);
-  const uint8_t* non_sum_ptr = dex_file + non_sum;
-  adler_checksum = adler32(adler_checksum, non_sum_ptr, expected_size - non_sum);
-  header->checksum_ = adler_checksum;
-}
-
-static std::unique_ptr<const DexFile> FixChecksumAndOpen(uint8_t* bytes, size_t length,
-                                                         const char* location,
-                                                         std::string* error_msg) {
-  // Check data.
-  CHECK(bytes != nullptr);
-
-  // Fixup of checksum.
-  FixUpChecksum(bytes);
-
-  // write to provided file
-  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
-  CHECK(file.get() != nullptr);
-  if (!file->WriteFully(bytes, length)) {
-    PLOG(FATAL) << "Failed to write base64 as dex file";
-  }
-  if (file->FlushCloseOrErase() != 0) {
-    PLOG(FATAL) << "Could not flush and close test file.";
-  }
-  file.reset();
-
-  // read dex file
-  ScopedObjectAccess soa(Thread::Current());
-  std::vector<std::unique_ptr<const DexFile>> tmp;
-  if (!DexFile::Open(location, location, error_msg, &tmp)) {
-    return nullptr;
-  }
-  EXPECT_EQ(1U, tmp.size());
-  std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
-  EXPECT_EQ(PROT_READ, dex_file->GetPermissions());
-  EXPECT_TRUE(dex_file->IsReadOnly());
-  return dex_file;
-}
-
-static bool ModifyAndLoad(const char* dex_file_content, const char* location, size_t offset,
-                          uint8_t new_val, std::string* error_msg) {
-  // Decode base64.
-  size_t length;
-  std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(dex_file_content, &length));
-  CHECK(dex_bytes.get() != nullptr);
-
-  // Make modifications.
-  dex_bytes.get()[offset] = new_val;
-
-  // Fixup and load.
-  std::unique_ptr<const DexFile> file(FixChecksumAndOpen(dex_bytes.get(), length, location,
-                                                         error_msg));
-  return file.get() != nullptr;
-}
-
 TEST_F(DexFileVerifierTest, MethodId) {
-  {
-    // Class error.
-    ScratchFile tmp;
-    std::string error_msg;
-    bool success = !ModifyAndLoad(kGoodTestDex, tmp.GetFilename().c_str(), 220, 0xFFU, &error_msg);
-    ASSERT_TRUE(success);
-    ASSERT_NE(error_msg.find("inter_method_id_item class_idx"), std::string::npos) << error_msg;
+  // Class idx error.
+  VerifyModification(
+      kGoodTestDex,
+      "method_id_class_idx",
+      [](DexFile* dex_file) {
+        DexFile::MethodId* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(0));
+        method_id->class_idx_ = 0xFF;
+      },
+      "could not find declaring class for direct method index 0");
+
+  // Proto idx error.
+  VerifyModification(
+      kGoodTestDex,
+      "method_id_proto_idx",
+      [](DexFile* dex_file) {
+        DexFile::MethodId* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(0));
+        method_id->proto_idx_ = 0xFF;
+      },
+      "inter_method_id_item proto_idx");
+
+  // Name idx error.
+  VerifyModification(
+      kGoodTestDex,
+      "method_id_name_idx",
+      [](DexFile* dex_file) {
+        DexFile::MethodId* method_id = const_cast<DexFile::MethodId*>(&dex_file->GetMethodId(0));
+        method_id->name_idx_ = 0xFF;
+      },
+      "String index not available for method flags verification");
+}
+
+// Method flags test class generated from the following smali code. The declared-synchronized
+// flags are there to enforce a 3-byte uLEB128 encoding so we don't have to relayout
+// the code, but we need to remove them before doing tests.
+//
+// .class public LMethodFlags;
+// .super Ljava/lang/Object;
+//
+// .method public static constructor <clinit>()V
+// .registers 1
+//     return-void
+// .end method
+//
+// .method public constructor <init>()V
+// .registers 1
+//     return-void
+// .end method
+//
+// .method private declared-synchronized foo()V
+// .registers 1
+//     return-void
+// .end method
+//
+// .method public declared-synchronized bar()V
+// .registers 1
+//     return-void
+// .end method
+
+static const char kMethodFlagsTestDex[] =
+    "ZGV4CjAzNQCyOQrJaDBwiIWv5MIuYKXhxlLLsQcx5SwgAgAAcAAAAHhWNBIAAAAAAAAAAJgBAAAH"
+    "AAAAcAAAAAMAAACMAAAAAQAAAJgAAAAAAAAAAAAAAAQAAACkAAAAAQAAAMQAAAA8AQAA5AAAAOQA"
+    "AADuAAAA9gAAAAUBAAAZAQAAHAEAACEBAAACAAAAAwAAAAQAAAAEAAAAAgAAAAAAAAAAAAAAAAAA"
+    "AAAAAAABAAAAAAAAAAUAAAAAAAAABgAAAAAAAAABAAAAAQAAAAAAAAD/////AAAAAHoBAAAAAAAA"
+    "CDxjbGluaXQ+AAY8aW5pdD4ADUxNZXRob2RGbGFnczsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgAD"
+    "YmFyAANmb28AAAAAAAAAAQAAAAAAAAAAAAAAAQAAAA4AAAABAAEAAAAAAAAAAAABAAAADgAAAAEA"
+    "AQAAAAAAAAAAAAEAAAAOAAAAAQABAAAAAAAAAAAAAQAAAA4AAAADAQCJgASsAgGBgATAAgKCgAjU"
+    "AgKBgAjoAgAACwAAAAAAAAABAAAAAAAAAAEAAAAHAAAAcAAAAAIAAAADAAAAjAAAAAMAAAABAAAA"
+    "mAAAAAUAAAAEAAAApAAAAAYAAAABAAAAxAAAAAIgAAAHAAAA5AAAAAMQAAABAAAAKAEAAAEgAAAE"
+    "AAAALAEAAAAgAAABAAAAegEAAAAQAAABAAAAmAEAAA==";
+
+// Find the method data for the first method with the given name (from class 0). Note: the pointer
+// is to the access flags, so that the caller doesn't have to handle the leb128-encoded method-index
+// delta.
+static const uint8_t* FindMethodData(const DexFile* dex_file, const char* name) {
+  const DexFile::ClassDef& class_def = dex_file->GetClassDef(0);
+  const uint8_t* class_data = dex_file->GetClassData(class_def);
+
+  ClassDataItemIterator it(*dex_file, class_data);
+
+  const uint8_t* trailing = class_data;
+  // Need to manually decode the four entries. DataPointer() doesn't work for this, as the first
+  // element has already been loaded into the iterator.
+  DecodeUnsignedLeb128(&trailing);
+  DecodeUnsignedLeb128(&trailing);
+  DecodeUnsignedLeb128(&trailing);
+  DecodeUnsignedLeb128(&trailing);
+
+  // Skip all fields.
+  while (it.HasNextStaticField() || it.HasNextInstanceField()) {
+    trailing = it.DataPointer();
+    it.Next();
   }
 
-  {
-    // Proto error.
-    ScratchFile tmp;
-    std::string error_msg;
-    bool success = !ModifyAndLoad(kGoodTestDex, tmp.GetFilename().c_str(), 222, 0xFFU, &error_msg);
-    ASSERT_TRUE(success);
-    ASSERT_NE(error_msg.find("inter_method_id_item proto_idx"), std::string::npos) << error_msg;
+  while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
+    uint32_t method_index = it.GetMemberIndex();
+    uint32_t name_index = dex_file->GetMethodId(method_index).name_idx_;
+    const DexFile::StringId& string_id = dex_file->GetStringId(name_index);
+    const char* str = dex_file->GetStringData(string_id);
+    if (strcmp(name, str) == 0) {
+      DecodeUnsignedLeb128(&trailing);
+      return trailing;
+    }
+
+    trailing = it.DataPointer();
+    it.Next();
   }
 
-  {
-    // Name error.
-    ScratchFile tmp;
-    std::string error_msg;
-    bool success = !ModifyAndLoad(kGoodTestDex, tmp.GetFilename().c_str(), 224, 0xFFU, &error_msg);
-    ASSERT_TRUE(success);
-    ASSERT_NE(error_msg.find("inter_method_id_item name_idx"), std::string::npos) << error_msg;
+  return nullptr;
+}
+
+// Set the method flags to the given value.
+static void SetMethodFlags(DexFile* dex_file, const char* method, uint32_t mask) {
+  uint8_t* method_flags_ptr = const_cast<uint8_t*>(FindMethodData(dex_file, method));
+  CHECK(method_flags_ptr != nullptr) << method;
+
+    // Unroll this, as we only have three bytes, anyways.
+  uint8_t base1 = static_cast<uint8_t>(mask & 0x7F);
+  *(method_flags_ptr++) = (base1 | 0x80);
+  mask >>= 7;
+
+  uint8_t base2 = static_cast<uint8_t>(mask & 0x7F);
+  *(method_flags_ptr++) = (base2 | 0x80);
+  mask >>= 7;
+
+  uint8_t base3 = static_cast<uint8_t>(mask & 0x7F);
+  *method_flags_ptr = base3;
+}
+
+static uint32_t GetMethodFlags(DexFile* dex_file, const char* method) {
+  const uint8_t* method_flags_ptr = const_cast<uint8_t*>(FindMethodData(dex_file, method));
+  CHECK(method_flags_ptr != nullptr) << method;
+  return DecodeUnsignedLeb128(&method_flags_ptr);
+}
+
+// Apply the given mask to method flags.
+static void ApplyMaskToMethodFlags(DexFile* dex_file, const char* method, uint32_t mask) {
+  uint32_t value = GetMethodFlags(dex_file, method);
+  value &= mask;
+  SetMethodFlags(dex_file, method, value);
+}
+
+// Apply the given mask to method flags.
+static void OrMaskToMethodFlags(DexFile* dex_file, const char* method, uint32_t mask) {
+  uint32_t value = GetMethodFlags(dex_file, method);
+  value |= mask;
+  SetMethodFlags(dex_file, method, value);
+}
+
+// Set code_off to 0 for the method.
+static void RemoveCode(DexFile* dex_file, const char* method) {
+  const uint8_t* ptr = FindMethodData(dex_file, method);
+  // Next is flags, pass.
+  DecodeUnsignedLeb128(&ptr);
+
+  // Figure out how many bytes the code_off is.
+  const uint8_t* tmp = ptr;
+  DecodeUnsignedLeb128(&tmp);
+  size_t bytes = tmp - ptr;
+
+  uint8_t* mod = const_cast<uint8_t*>(ptr);
+  for (size_t i = 1; i < bytes; ++i) {
+    *(mod++) = 0x80;
   }
+  *mod = 0x00;
+}
+
+TEST_F(DexFileVerifierTest, MethodAccessFlagsBase) {
+  // Check that it's OK when the wrong declared-synchronized flag is removed from "foo."
+  VerifyModification(
+      kMethodFlagsTestDex,
+      "method_flags_ok",
+      [](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+        ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+      },
+      nullptr);
+}
+
+TEST_F(DexFileVerifierTest, MethodAccessFlagsConstructors) {
+  // Make sure we still accept constructors without their flags.
+  VerifyModification(
+      kMethodFlagsTestDex,
+      "method_flags_missing_constructor_tag_ok",
+      [](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+        ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "<init>", ~kAccConstructor);
+        ApplyMaskToMethodFlags(dex_file, "<clinit>", ~kAccConstructor);
+      },
+      nullptr);
+
+  constexpr const char* kConstructors[] = { "<clinit>", "<init>"};
+  for (size_t i = 0; i < 2; ++i) {
+    // Constructor with code marked native.
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_constructor_native",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToMethodFlags(dex_file, kConstructors[i], kAccNative);
+        },
+        "has code, but is marked native or abstract");
+    // Constructor with code marked abstract.
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_constructor_abstract",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToMethodFlags(dex_file, kConstructors[i], kAccAbstract);
+        },
+        "has code, but is marked native or abstract");
+    // Constructor as-is without code.
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_constructor_nocode",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          RemoveCode(dex_file, kConstructors[i]);
+        },
+        "has no code, but is not marked native or abstract");
+    // Constructor without code marked native.
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_constructor_native_nocode",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToMethodFlags(dex_file, kConstructors[i], kAccNative);
+          RemoveCode(dex_file, kConstructors[i]);
+        },
+        "must not be abstract or native");
+    // Constructor without code marked abstract.
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_constructor_abstract_nocode",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToMethodFlags(dex_file, kConstructors[i], kAccAbstract);
+          RemoveCode(dex_file, kConstructors[i]);
+        },
+        "must not be abstract or native");
+  }
+  // <init> may only have (modulo ignored):
+  // kAccPrivate | kAccProtected | kAccPublic | kAccStrict | kAccVarargs | kAccSynthetic
+  static constexpr uint32_t kInitAllowed[] = {
+      0,
+      kAccPrivate,
+      kAccProtected,
+      kAccPublic,
+      kAccStrict,
+      kAccVarargs,
+      kAccSynthetic
+  };
+  for (size_t i = 0; i < arraysize(kInitAllowed); ++i) {
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "init_allowed_flags",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          ApplyMaskToMethodFlags(dex_file, "<init>", ~kAccPublic);
+          OrMaskToMethodFlags(dex_file, "<init>", kInitAllowed[i]);
+        },
+        nullptr);
+  }
+  // Only one of public-private-protected.
+  for (size_t i = 1; i < 8; ++i) {
+    if (POPCOUNT(i) < 2) {
+      continue;
+    }
+    // Technically the flags match, but just be defensive here.
+    uint32_t mask = ((i & 1) != 0 ? kAccPrivate : 0) |
+                    ((i & 2) != 0 ? kAccProtected : 0) |
+                    ((i & 4) != 0 ? kAccPublic : 0);
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "init_one_of_ppp",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          ApplyMaskToMethodFlags(dex_file, "<init>", ~kAccPublic);
+          OrMaskToMethodFlags(dex_file, "<init>", mask);
+        },
+        "Method may have only one of public/protected/private");
+  }
+  // <init> doesn't allow
+  // kAccStatic | kAccFinal | kAccSynchronized | kAccBridge
+  // Need to handle static separately as it has its own error message.
+  VerifyModification(
+      kMethodFlagsTestDex,
+      "init_not_allowed_flags",
+      [&](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+        ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "<init>", ~kAccPublic);
+        OrMaskToMethodFlags(dex_file, "<init>", kAccStatic);
+      },
+      "Constructor 1 is not flagged correctly wrt/ static");
+  static constexpr uint32_t kInitNotAllowed[] = {
+      kAccFinal,
+      kAccSynchronized,
+      kAccBridge
+  };
+  for (size_t i = 0; i < arraysize(kInitNotAllowed); ++i) {
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "init_not_allowed_flags",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          ApplyMaskToMethodFlags(dex_file, "<init>", ~kAccPublic);
+          OrMaskToMethodFlags(dex_file, "<init>", kInitNotAllowed[i]);
+        },
+        "Constructor 1 flagged inappropriately");
+  }
+}
+
+TEST_F(DexFileVerifierTest, MethodAccessFlagsMethods) {
+  constexpr const char* kMethods[] = { "foo", "bar"};
+  for (size_t i = 0; i < arraysize(kMethods); ++i) {
+    // Make sure we reject non-constructors marked as constructors.
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_non_constructor",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToMethodFlags(dex_file, kMethods[i], kAccConstructor);
+        },
+        "is marked constructor, but doesn't match name");
+
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_native_with_code",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToMethodFlags(dex_file, kMethods[i], kAccNative);
+        },
+        "has code, but is marked native or abstract");
+
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_abstract_with_code",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToMethodFlags(dex_file, kMethods[i], kAccAbstract);
+        },
+        "has code, but is marked native or abstract");
+
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_non_abstract_native_no_code",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          RemoveCode(dex_file, kMethods[i]);
+        },
+        "has no code, but is not marked native or abstract");
+
+    // Abstract methods may not have the following flags.
+    constexpr uint32_t kAbstractDisallowed[] = {
+        kAccPrivate,
+        kAccStatic,
+        kAccFinal,
+        kAccNative,
+        kAccStrict,
+        kAccSynchronized,
+    };
+    for (size_t j = 0; j < arraysize(kAbstractDisallowed); ++j) {
+      VerifyModification(
+          kMethodFlagsTestDex,
+          "method_flags_abstract_and_disallowed_no_code",
+          [&](DexFile* dex_file) {
+            ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+            ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+            RemoveCode(dex_file, kMethods[i]);
+
+            // Can't check private and static with foo, as it's in the virtual list and gives a
+            // different error.
+            if (((GetMethodFlags(dex_file, kMethods[i]) & kAccPublic) != 0) &&
+                ((kAbstractDisallowed[j] & (kAccPrivate | kAccStatic)) != 0)) {
+              // Use another breaking flag.
+              OrMaskToMethodFlags(dex_file, kMethods[i], kAccAbstract | kAccFinal);
+            } else {
+              OrMaskToMethodFlags(dex_file, kMethods[i], kAccAbstract | kAbstractDisallowed[j]);
+            }
+          },
+          "has disallowed access flags");
+    }
+
+    // Only one of public-private-protected.
+    for (size_t j = 1; j < 8; ++j) {
+      if (POPCOUNT(j) < 2) {
+        continue;
+      }
+      // Technically the flags match, but just be defensive here.
+      uint32_t mask = ((j & 1) != 0 ? kAccPrivate : 0) |
+                      ((j & 2) != 0 ? kAccProtected : 0) |
+                      ((j & 4) != 0 ? kAccPublic : 0);
+      VerifyModification(
+          kMethodFlagsTestDex,
+          "method_flags_one_of_ppp",
+          [&](DexFile* dex_file) {
+            ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+            ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+            ApplyMaskToMethodFlags(dex_file, kMethods[i], ~kAccPublic);
+            OrMaskToMethodFlags(dex_file, kMethods[i], mask);
+          },
+          "Method may have only one of public/protected/private");
+    }
+  }
+}
+
+TEST_F(DexFileVerifierTest, MethodAccessFlagsIgnoredOK) {
+  constexpr const char* kMethods[] = { "<clinit>", "<init>", "foo", "bar"};
+  for (size_t i = 0; i < arraysize(kMethods); ++i) {
+    // All interesting method flags, other flags are to be ignored.
+    constexpr uint32_t kAllMethodFlags =
+        kAccPublic |
+        kAccPrivate |
+        kAccProtected |
+        kAccStatic |
+        kAccFinal |
+        kAccSynchronized |
+        kAccBridge |
+        kAccVarargs |
+        kAccNative |
+        kAccAbstract |
+        kAccStrict |
+        kAccSynthetic;
+    constexpr uint32_t kIgnoredMask = ~kAllMethodFlags & 0xFFFF;
+    VerifyModification(
+        kMethodFlagsTestDex,
+        "method_flags_ignored",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToMethodFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToMethodFlags(dex_file, kMethods[i], kIgnoredMask);
+        },
+        nullptr);
+  }
+}
+
+// Set of dex files for interface method tests. As it's not as easy to mutate method names, it's
+// just easier to break up bad cases.
+
+// Interface with an instance constructor.
+//
+// .class public interface LInterfaceMethodFlags;
+// .super Ljava/lang/Object;
+//
+// .method public static constructor <clinit>()V
+// .registers 1
+//     return-void
+// .end method
+//
+// .method public constructor <init>()V
+// .registers 1
+//     return-void
+// .end method
+static const char kMethodFlagsInterfaceWithInit[] =
+    "ZGV4CjAzNQDRNt+hZ6X3I+xe66iVlCW7h9I38HmN4SvUAQAAcAAAAHhWNBIAAAAAAAAAAEwBAAAF"
+    "AAAAcAAAAAMAAACEAAAAAQAAAJAAAAAAAAAAAAAAAAIAAACcAAAAAQAAAKwAAAAIAQAAzAAAAMwA"
+    "AADWAAAA3gAAAPYAAAAKAQAAAgAAAAMAAAAEAAAABAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAQAA"
+    "AAAAAAABAgAAAQAAAAAAAAD/////AAAAADoBAAAAAAAACDxjbGluaXQ+AAY8aW5pdD4AFkxJbnRl"
+    "cmZhY2VNZXRob2RGbGFnczsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgAAAAAAAAAAAQAAAAAAAAAA"
+    "AAAAAQAAAA4AAAABAAEAAAAAAAAAAAABAAAADgAAAAIAAImABJQCAYGABKgCAAALAAAAAAAAAAEA"
+    "AAAAAAAAAQAAAAUAAABwAAAAAgAAAAMAAACEAAAAAwAAAAEAAACQAAAABQAAAAIAAACcAAAABgAA"
+    "AAEAAACsAAAAAiAAAAUAAADMAAAAAxAAAAEAAAAQAQAAASAAAAIAAAAUAQAAACAAAAEAAAA6AQAA"
+    "ABAAAAEAAABMAQAA";
+
+// Standard interface. Use declared-synchronized again for 3B encoding.
+//
+// .class public interface LInterfaceMethodFlags;
+// .super Ljava/lang/Object;
+//
+// .method public static constructor <clinit>()V
+// .registers 1
+//     return-void
+// .end method
+//
+// .method public abstract declared-synchronized foo()V
+// .end method
+static const char kMethodFlagsInterface[] =
+    "ZGV4CjAzNQCOM0odZ5bws1d9GSmumXaK5iE/7XxFpOm8AQAAcAAAAHhWNBIAAAAAAAAAADQBAAAF"
+    "AAAAcAAAAAMAAACEAAAAAQAAAJAAAAAAAAAAAAAAAAIAAACcAAAAAQAAAKwAAADwAAAAzAAAAMwA"
+    "AADWAAAA7gAAAAIBAAAFAQAAAQAAAAIAAAADAAAAAwAAAAIAAAAAAAAAAAAAAAAAAAAAAAAABAAA"
+    "AAAAAAABAgAAAQAAAAAAAAD/////AAAAACIBAAAAAAAACDxjbGluaXQ+ABZMSW50ZXJmYWNlTWV0"
+    "aG9kRmxhZ3M7ABJMamF2YS9sYW5nL09iamVjdDsAAVYAA2ZvbwAAAAAAAAABAAAAAAAAAAAAAAAB"
+    "AAAADgAAAAEBAImABJACAYGICAAAAAALAAAAAAAAAAEAAAAAAAAAAQAAAAUAAABwAAAAAgAAAAMA"
+    "AACEAAAAAwAAAAEAAACQAAAABQAAAAIAAACcAAAABgAAAAEAAACsAAAAAiAAAAUAAADMAAAAAxAA"
+    "AAEAAAAMAQAAASAAAAEAAAAQAQAAACAAAAEAAAAiAQAAABAAAAEAAAA0AQAA";
+
+// To simplify generation of interesting "sub-states" of src_value, allow a "simple" mask to apply
+// to a src_value, such that mask bit 0 applies to the lowest set bit in src_value, and so on.
+static uint32_t ApplyMaskShifted(uint32_t src_value, uint32_t mask) {
+  uint32_t result = 0;
+  uint32_t mask_index = 0;
+  while (src_value != 0) {
+    uint32_t index = CTZ(src_value);
+    if (((src_value & (1 << index)) != 0) &&
+        ((mask & (1 << mask_index)) != 0)) {
+      result |= (1 << index);
+    }
+    src_value &= ~(1 << index);
+    mask_index++;
+  }
+  return result;
+}
+
+TEST_F(DexFileVerifierTest, MethodAccessFlagsInterfaces) {
+  // Reject interface with <init>.
+  VerifyModification(
+      kMethodFlagsInterfaceWithInit,
+      "method_flags_interface_with_init",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) {},
+      "Non-clinit interface method 1 should not have code");
+
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_ok",
+      [](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+      },
+      nullptr);
+
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_non_public",
+      [](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+      },
+      "Interface method 1 is not public and abstract");
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_non_abstract",
+      [](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccAbstract);
+      },
+      "Method 1 has no code, but is not marked native or abstract");
+
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_static",
+      [](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        OrMaskToMethodFlags(dex_file, "foo", kAccStatic);
+      },
+      "Direct/virtual method 1 not in expected list 0");
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_private",
+      [](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToMethodFlags(dex_file, "foo", kAccPrivate);
+      },
+      "Direct/virtual method 1 not in expected list 0");
+
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_non_public",
+      [](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+      },
+      "Interface method 1 is not public and abstract");
+  VerifyModification(
+      kMethodFlagsInterface,
+      "method_flags_interface_protected",
+      [](DexFile* dex_file) {
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToMethodFlags(dex_file, "foo", kAccProtected);
+      },
+      "Interface method 1 is not public and abstract");
+
+  constexpr uint32_t kAllMethodFlags =
+      kAccPublic |
+      kAccPrivate |
+      kAccProtected |
+      kAccStatic |
+      kAccFinal |
+      kAccSynchronized |
+      kAccBridge |
+      kAccVarargs |
+      kAccNative |
+      kAccAbstract |
+      kAccStrict |
+      kAccSynthetic;
+  constexpr uint32_t kInterfaceMethodFlags =
+      kAccPublic | kAccAbstract | kAccVarargs | kAccBridge | kAccSynthetic;
+  constexpr uint32_t kInterfaceDisallowed = kAllMethodFlags &
+                                            ~kInterfaceMethodFlags &
+                                            // Already tested, needed to be separate.
+                                            ~kAccStatic &
+                                            ~kAccPrivate &
+                                            ~kAccProtected;
+  static_assert(kInterfaceDisallowed != 0, "There should be disallowed flags.");
+
+  uint32_t bits = POPCOUNT(kInterfaceDisallowed);
+  for (uint32_t i = 1; i < (1u << bits); ++i) {
+    VerifyModification(
+        kMethodFlagsInterface,
+        "method_flags_interface_non_abstract",
+        [&](DexFile* dex_file) {
+          ApplyMaskToMethodFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+          uint32_t mask = ApplyMaskShifted(kInterfaceDisallowed, i);
+          if ((mask & kAccProtected) != 0) {
+            mask &= ~kAccProtected;
+            ApplyMaskToMethodFlags(dex_file, "foo", ~kAccPublic);
+          }
+          OrMaskToMethodFlags(dex_file, "foo", mask);
+        },
+        "Abstract method 1 has disallowed access flags");
+  }
+}
+
+///////////////////////////////////////////////////////////////////
+
+// Field flags.
+
+// Find the method data for the first method with the given name (from class 0). Note: the pointer
+// is to the access flags, so that the caller doesn't have to handle the leb128-encoded method-index
+// delta.
+static const uint8_t* FindFieldData(const DexFile* dex_file, const char* name) {
+  const DexFile::ClassDef& class_def = dex_file->GetClassDef(0);
+  const uint8_t* class_data = dex_file->GetClassData(class_def);
+
+  ClassDataItemIterator it(*dex_file, class_data);
+
+  const uint8_t* trailing = class_data;
+  // Need to manually decode the four entries. DataPointer() doesn't work for this, as the first
+  // element has already been loaded into the iterator.
+  DecodeUnsignedLeb128(&trailing);
+  DecodeUnsignedLeb128(&trailing);
+  DecodeUnsignedLeb128(&trailing);
+  DecodeUnsignedLeb128(&trailing);
+
+  while (it.HasNextStaticField() || it.HasNextInstanceField()) {
+    uint32_t field_index = it.GetMemberIndex();
+    uint32_t name_index = dex_file->GetFieldId(field_index).name_idx_;
+    const DexFile::StringId& string_id = dex_file->GetStringId(name_index);
+    const char* str = dex_file->GetStringData(string_id);
+    if (strcmp(name, str) == 0) {
+      DecodeUnsignedLeb128(&trailing);
+      return trailing;
+    }
+
+    trailing = it.DataPointer();
+    it.Next();
+  }
+
+  return nullptr;
+}
+
+// Set the method flags to the given value.
+static void SetFieldFlags(DexFile* dex_file, const char* field, uint32_t mask) {
+  uint8_t* field_flags_ptr = const_cast<uint8_t*>(FindFieldData(dex_file, field));
+  CHECK(field_flags_ptr != nullptr) << field;
+
+    // Unroll this, as we only have three bytes, anyways.
+  uint8_t base1 = static_cast<uint8_t>(mask & 0x7F);
+  *(field_flags_ptr++) = (base1 | 0x80);
+  mask >>= 7;
+
+  uint8_t base2 = static_cast<uint8_t>(mask & 0x7F);
+  *(field_flags_ptr++) = (base2 | 0x80);
+  mask >>= 7;
+
+  uint8_t base3 = static_cast<uint8_t>(mask & 0x7F);
+  *field_flags_ptr = base3;
+}
+
+static uint32_t GetFieldFlags(DexFile* dex_file, const char* field) {
+  const uint8_t* field_flags_ptr = const_cast<uint8_t*>(FindFieldData(dex_file, field));
+  CHECK(field_flags_ptr != nullptr) << field;
+  return DecodeUnsignedLeb128(&field_flags_ptr);
+}
+
+// Apply the given mask to method flags.
+static void ApplyMaskToFieldFlags(DexFile* dex_file, const char* field, uint32_t mask) {
+  uint32_t value = GetFieldFlags(dex_file, field);
+  value &= mask;
+  SetFieldFlags(dex_file, field, value);
+}
+
+// Apply the given mask to method flags.
+static void OrMaskToFieldFlags(DexFile* dex_file, const char* field, uint32_t mask) {
+  uint32_t value = GetFieldFlags(dex_file, field);
+  value |= mask;
+  SetFieldFlags(dex_file, field, value);
+}
+
+// Standard class. Use declared-synchronized again for 3B encoding.
+//
+// .class public LFieldFlags;
+// .super Ljava/lang/Object;
+//
+// .field declared-synchronized public foo:I
+//
+// .field declared-synchronized public static bar:I
+
+static const char kFieldFlagsTestDex[] =
+    "ZGV4CjAzNQBtLw7hydbfv4TdXidZyzAB70W7w3vnYJRwAQAAcAAAAHhWNBIAAAAAAAAAAAABAAAF"
+    "AAAAcAAAAAMAAACEAAAAAAAAAAAAAAACAAAAkAAAAAAAAAAAAAAAAQAAAKAAAACwAAAAwAAAAMAA"
+    "AADDAAAA0QAAAOUAAADqAAAAAAAAAAEAAAACAAAAAQAAAAMAAAABAAAABAAAAAEAAAABAAAAAgAA"
+    "AAAAAAD/////AAAAAPQAAAAAAAAAAUkADExGaWVsZEZsYWdzOwASTGphdmEvbGFuZy9PYmplY3Q7"
+    "AANiYXIAA2ZvbwAAAAAAAAEBAAAAiYAIAYGACAkAAAAAAAAAAQAAAAAAAAABAAAABQAAAHAAAAAC"
+    "AAAAAwAAAIQAAAAEAAAAAgAAAJAAAAAGAAAAAQAAAKAAAAACIAAABQAAAMAAAAADEAAAAQAAAPAA"
+    "AAAAIAAAAQAAAPQAAAAAEAAAAQAAAAABAAA=";
+
+TEST_F(DexFileVerifierTest, FieldAccessFlagsBase) {
+  // Check that it's OK when the wrong declared-synchronized flag is removed from "foo."
+  VerifyModification(
+      kFieldFlagsTestDex,
+      "field_flags_ok",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+        ApplyMaskToFieldFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+      },
+      nullptr);
+}
+
+TEST_F(DexFileVerifierTest, FieldAccessFlagsWrongList) {
+  // Mark the field so that it should appear in the opposite list (instance vs static).
+  VerifyModification(
+      kFieldFlagsTestDex,
+      "field_flags_wrong_list",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+        ApplyMaskToFieldFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+        OrMaskToFieldFlags(dex_file, "foo", kAccStatic);
+      },
+      "Static/instance field not in expected list");
+  VerifyModification(
+      kFieldFlagsTestDex,
+      "field_flags_wrong_list",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+        ApplyMaskToFieldFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "bar", ~kAccStatic);
+      },
+      "Static/instance field not in expected list");
+}
+
+TEST_F(DexFileVerifierTest, FieldAccessFlagsPPP) {
+  static const char* kFields[] = { "foo", "bar" };
+  for (size_t i = 0; i < arraysize(kFields); ++i) {
+    // Should be OK to remove public.
+    VerifyModification(
+        kFieldFlagsTestDex,
+        "field_flags_non_public",
+        [&](DexFile* dex_file) {
+          ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToFieldFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          ApplyMaskToFieldFlags(dex_file, kFields[i], ~kAccPublic);
+        },
+        nullptr);
+    constexpr uint32_t kAccFlags = kAccPublic | kAccPrivate | kAccProtected;
+    uint32_t bits = POPCOUNT(kAccFlags);
+    for (uint32_t j = 1; j < (1u << bits); ++j) {
+      if (POPCOUNT(j) < 2) {
+        continue;
+      }
+      VerifyModification(
+           kFieldFlagsTestDex,
+           "field_flags_ppp",
+           [&](DexFile* dex_file) {
+             ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+             ApplyMaskToFieldFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+             ApplyMaskToFieldFlags(dex_file, kFields[i], ~kAccPublic);
+             uint32_t mask = ApplyMaskShifted(kAccFlags, j);
+             OrMaskToFieldFlags(dex_file, kFields[i], mask);
+           },
+           "Field may have only one of public/protected/private");
+    }
+  }
+}
+
+TEST_F(DexFileVerifierTest, FieldAccessFlagsIgnoredOK) {
+  constexpr const char* kFields[] = { "foo", "bar"};
+  for (size_t i = 0; i < arraysize(kFields); ++i) {
+    // All interesting method flags, other flags are to be ignored.
+    constexpr uint32_t kAllFieldFlags =
+        kAccPublic |
+        kAccPrivate |
+        kAccProtected |
+        kAccStatic |
+        kAccFinal |
+        kAccVolatile |
+        kAccTransient |
+        kAccSynthetic |
+        kAccEnum;
+    constexpr uint32_t kIgnoredMask = ~kAllFieldFlags & 0xFFFF;
+    VerifyModification(
+        kFieldFlagsTestDex,
+        "field_flags_ignored",
+        [&](DexFile* dex_file) {
+          ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToFieldFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToFieldFlags(dex_file, kFields[i], kIgnoredMask);
+        },
+        nullptr);
+  }
+}
+
+TEST_F(DexFileVerifierTest, FieldAccessFlagsVolatileFinal) {
+  constexpr const char* kFields[] = { "foo", "bar"};
+  for (size_t i = 0; i < arraysize(kFields); ++i) {
+    VerifyModification(
+        kFieldFlagsTestDex,
+        "field_flags_final_and_volatile",
+        [&](DexFile* dex_file) {
+          ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+          ApplyMaskToFieldFlags(dex_file, "bar", ~kAccDeclaredSynchronized);
+
+          OrMaskToFieldFlags(dex_file, kFields[i], kAccVolatile | kAccFinal);
+        },
+        "Fields may not be volatile and final");
+  }
+}
+
+// Standard interface. Needs to be separate from class as interfaces do not allow instance fields.
+// Use declared-synchronized again for 3B encoding.
+//
+// .class public interface LInterfaceFieldFlags;
+// .super Ljava/lang/Object;
+//
+// .field declared-synchronized public static final foo:I
+
+static const char kFieldFlagsInterfaceTestDex[] =
+    "ZGV4CjAzNQCVMHfEimR1zZPk6hl6O9GPAYqkl3u0umFkAQAAcAAAAHhWNBIAAAAAAAAAAPQAAAAE"
+    "AAAAcAAAAAMAAACAAAAAAAAAAAAAAAABAAAAjAAAAAAAAAAAAAAAAQAAAJQAAACwAAAAtAAAALQA"
+    "AAC3AAAAzgAAAOIAAAAAAAAAAQAAAAIAAAABAAAAAwAAAAEAAAABAgAAAgAAAAAAAAD/////AAAA"
+    "AOwAAAAAAAAAAUkAFUxJbnRlcmZhY2VGaWVsZEZsYWdzOwASTGphdmEvbGFuZy9PYmplY3Q7AANm"
+    "b28AAAAAAAABAAAAAJmACAkAAAAAAAAAAQAAAAAAAAABAAAABAAAAHAAAAACAAAAAwAAAIAAAAAE"
+    "AAAAAQAAAIwAAAAGAAAAAQAAAJQAAAACIAAABAAAALQAAAADEAAAAQAAAOgAAAAAIAAAAQAAAOwA"
+    "AAAAEAAAAQAAAPQAAAA=";
+
+TEST_F(DexFileVerifierTest, FieldAccessFlagsInterface) {
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+      },
+      nullptr);
+
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_non_public",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+      },
+      "Interface field is not public final static");
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_non_final",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccFinal);
+      },
+      "Interface field is not public final static");
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_protected",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToFieldFlags(dex_file, "foo", kAccProtected);
+      },
+      "Interface field is not public final static");
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_private",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+        OrMaskToFieldFlags(dex_file, "foo", kAccPrivate);
+      },
+      "Interface field is not public final static");
+
+  VerifyModification(
+      kFieldFlagsInterfaceTestDex,
+      "field_flags_interface_synthetic",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+        OrMaskToFieldFlags(dex_file, "foo", kAccSynthetic);
+      },
+      nullptr);
+
+  constexpr uint32_t kAllFieldFlags =
+      kAccPublic |
+      kAccPrivate |
+      kAccProtected |
+      kAccStatic |
+      kAccFinal |
+      kAccVolatile |
+      kAccTransient |
+      kAccSynthetic |
+      kAccEnum;
+  constexpr uint32_t kInterfaceFieldFlags = kAccPublic | kAccStatic | kAccFinal | kAccSynthetic;
+  constexpr uint32_t kInterfaceDisallowed = kAllFieldFlags &
+                                            ~kInterfaceFieldFlags &
+                                            ~kAccProtected &
+                                            ~kAccPrivate;
+  static_assert(kInterfaceDisallowed != 0, "There should be disallowed flags.");
+
+  uint32_t bits = POPCOUNT(kInterfaceDisallowed);
+  for (uint32_t i = 1; i < (1u << bits); ++i) {
+    VerifyModification(
+        kFieldFlagsInterfaceTestDex,
+        "field_flags_interface_disallowed",
+        [&](DexFile* dex_file) {
+          ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+
+          uint32_t mask = ApplyMaskShifted(kInterfaceDisallowed, i);
+          if ((mask & kAccProtected) != 0) {
+            mask &= ~kAccProtected;
+            ApplyMaskToFieldFlags(dex_file, "foo", ~kAccPublic);
+          }
+          OrMaskToFieldFlags(dex_file, "foo", mask);
+        },
+        "Interface field has disallowed flag");
+  }
+}
+
+// Standard bad interface. Needs to be separate from class as interfaces do not allow instance
+// fields. Use declared-synchronized again for 3B encoding.
+//
+// .class public interface LInterfaceFieldFlags;
+// .super Ljava/lang/Object;
+//
+// .field declared-synchronized public final foo:I
+
+static const char kFieldFlagsInterfaceBadTestDex[] =
+    "ZGV4CjAzNQByMUnqYKHBkUpvvNp+9CnZ2VyDkKnRN6VkAQAAcAAAAHhWNBIAAAAAAAAAAPQAAAAE"
+    "AAAAcAAAAAMAAACAAAAAAAAAAAAAAAABAAAAjAAAAAAAAAAAAAAAAQAAAJQAAACwAAAAtAAAALQA"
+    "AAC3AAAAzgAAAOIAAAAAAAAAAQAAAAIAAAABAAAAAwAAAAEAAAABAgAAAgAAAAAAAAD/////AAAA"
+    "AOwAAAAAAAAAAUkAFUxJbnRlcmZhY2VGaWVsZEZsYWdzOwASTGphdmEvbGFuZy9PYmplY3Q7AANm"
+    "b28AAAAAAAAAAQAAAJGACAkAAAAAAAAAAQAAAAAAAAABAAAABAAAAHAAAAACAAAAAwAAAIAAAAAE"
+    "AAAAAQAAAIwAAAAGAAAAAQAAAJQAAAACIAAABAAAALQAAAADEAAAAQAAAOgAAAAAIAAAAQAAAOwA"
+    "AAAAEAAAAQAAAPQAAAA=";
+
+TEST_F(DexFileVerifierTest, FieldAccessFlagsInterfaceNonStatic) {
+  VerifyModification(
+      kFieldFlagsInterfaceBadTestDex,
+      "field_flags_interface_non_static",
+      [](DexFile* dex_file) {
+        ApplyMaskToFieldFlags(dex_file, "foo", ~kAccDeclaredSynchronized);
+      },
+      "Interface field is not public final static");
 }
 
 // Generated from:
@@ -305,15 +1275,14 @@
     ASSERT_TRUE(raw.get() != nullptr) << error_msg;
   }
 
-  {
-    // Modify the debug information entry.
-    ScratchFile tmp;
-    std::string error_msg;
-    bool success = !ModifyAndLoad(kDebugInfoTestDex, tmp.GetFilename().c_str(), 416, 0x14U,
-                                  &error_msg);
-    ASSERT_TRUE(success);
-    ASSERT_NE(error_msg.find("DBG_START_LOCAL type_idx"), std::string::npos) << error_msg;
-  }
+  // Modify the debug information entry.
+  VerifyModification(
+      kDebugInfoTestDex,
+      "debug_start_type_idx",
+      [](DexFile* dex_file) {
+        *(const_cast<uint8_t*>(dex_file->Begin()) + 416) = 0x14U;
+      },
+      "DBG_START_LOCAL type_idx");
 }
 
 }  // namespace art
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 33d756e..9f84bd2 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -186,13 +186,15 @@
     fake_stack.push_back(0);
   }
 
-  fake_stack.push_back(method_g_->ToNativeQuickPc(dex_pc));  // return pc
+  fake_stack.push_back(
+      method_g_->ToNativeQuickPc(dex_pc, /* is_catch_handler */ false));  // return pc
 
   // Create/push fake 16byte stack frame for method g
   fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
   fake_stack.push_back(0);
   fake_stack.push_back(0);
-  fake_stack.push_back(method_f_->ToNativeQuickPc(dex_pc));  // return pc
+  fake_stack.push_back(
+      method_g_->ToNativeQuickPc(dex_pc, /* is_catch_handler */ false));  // return pc
 
   // Create/push fake 16byte stack frame for method f
   fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
diff --git a/runtime/gc/accounting/mod_union_table-inl.h b/runtime/gc/accounting/mod_union_table-inl.h
index c756127..3a09634 100644
--- a/runtime/gc/accounting/mod_union_table-inl.h
+++ b/runtime/gc/accounting/mod_union_table-inl.h
@@ -28,7 +28,8 @@
 // A mod-union table to record image references to the Zygote and alloc space.
 class ModUnionTableToZygoteAllocspace : public ModUnionTableReferenceCache {
  public:
-  explicit ModUnionTableToZygoteAllocspace(const std::string& name, Heap* heap,
+  explicit ModUnionTableToZygoteAllocspace(const std::string& name,
+                                           Heap* heap,
                                            space::ContinuousSpace* space)
       : ModUnionTableReferenceCache(name, heap, space) {}
 
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 5151819..1361f7b 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -27,9 +27,7 @@
 #include "gc/space/space.h"
 #include "mirror/object-inl.h"
 #include "space_bitmap-inl.h"
-#include "thread.h"
-
-using ::art::mirror::Object;
+#include "thread-inl.h"
 
 namespace art {
 namespace gc {
@@ -38,10 +36,10 @@
 class ModUnionAddToCardSetVisitor {
  public:
   explicit ModUnionAddToCardSetVisitor(ModUnionTable::CardSet* const cleared_cards)
-      : cleared_cards_(cleared_cards) {
-  }
+      : cleared_cards_(cleared_cards) {}
 
-  inline void operator()(uint8_t* card, uint8_t expected_value,
+  inline void operator()(uint8_t* card,
+                         uint8_t expected_value,
                          uint8_t new_value ATTRIBUTE_UNUSED) const {
     if (expected_value == CardTable::kCardDirty) {
       cleared_cards_->insert(card);
@@ -55,10 +53,10 @@
 class ModUnionAddToCardBitmapVisitor {
  public:
   ModUnionAddToCardBitmapVisitor(ModUnionTable::CardBitmap* bitmap, CardTable* card_table)
-      : bitmap_(bitmap), card_table_(card_table) {
-  }
+      : bitmap_(bitmap), card_table_(card_table) {}
 
-  inline void operator()(uint8_t* card, uint8_t expected_value,
+  inline void operator()(uint8_t* card,
+                         uint8_t expected_value,
                          uint8_t new_value ATTRIBUTE_UNUSED) const {
     if (expected_value == CardTable::kCardDirty) {
       // We want the address the card represents, not the address of the card.
@@ -93,12 +91,13 @@
                                         space::ContinuousSpace* from_space,
                                         space::ContinuousSpace* immune_space,
                                         bool* contains_reference_to_other_space)
-    : visitor_(visitor), from_space_(from_space), immune_space_(immune_space),
-      contains_reference_to_other_space_(contains_reference_to_other_space) {
-  }
+    : visitor_(visitor),
+      from_space_(from_space),
+      immune_space_(immune_space),
+      contains_reference_to_other_space_(contains_reference_to_other_space) {}
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
     MarkReference(obj->GetFieldObjectReferenceAddr(offset));
   }
@@ -144,14 +143,18 @@
                                space::ContinuousSpace* from_space,
                                space::ContinuousSpace* immune_space,
                                bool* contains_reference_to_other_space)
-      : visitor_(visitor), from_space_(from_space), immune_space_(immune_space),
+      : visitor_(visitor),
+        from_space_(from_space),
+        immune_space_(immune_space),
         contains_reference_to_other_space_(contains_reference_to_other_space) {}
 
-  void operator()(Object* root) const
+  void operator()(mirror::Object* root) const
       REQUIRES(Locks::heap_bitmap_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(root != nullptr);
-    ModUnionUpdateObjectReferencesVisitor ref_visitor(visitor_, from_space_, immune_space_,
+    ModUnionUpdateObjectReferencesVisitor ref_visitor(visitor_,
+                                                      from_space_,
+                                                      immune_space_,
                                                       contains_reference_to_other_space_);
     root->VisitReferences(ref_visitor, VoidFunctor());
   }
@@ -176,7 +179,7 @@
  public:
   AddToReferenceArrayVisitor(ModUnionTableReferenceCache* mod_union_table,
                              MarkObjectVisitor* visitor,
-                             std::vector<mirror::HeapReference<Object>*>* references,
+                             std::vector<mirror::HeapReference<mirror::Object>*>* references,
                              bool* has_target_reference)
       : mod_union_table_(mod_union_table),
         visitor_(visitor),
@@ -184,9 +187,9 @@
         has_target_reference_(has_target_reference) {}
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    mirror::HeapReference<Object>* ref_ptr = obj->GetFieldObjectReferenceAddr(offset);
+    mirror::HeapReference<mirror::Object>* ref_ptr = obj->GetFieldObjectReferenceAddr(offset);
     mirror::Object* ref = ref_ptr->AsMirrorPtr();
     // Only add the reference if it is non null and fits our criteria.
     if (ref != nullptr && mod_union_table_->ShouldAddReference(ref)) {
@@ -214,7 +217,7 @@
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
   MarkObjectVisitor* const visitor_;
-  std::vector<mirror::HeapReference<Object>*>* const references_;
+  std::vector<mirror::HeapReference<mirror::Object>*>* const references_;
   bool* const has_target_reference_;
 };
 
@@ -222,14 +225,14 @@
  public:
   ModUnionReferenceVisitor(ModUnionTableReferenceCache* const mod_union_table,
                            MarkObjectVisitor* visitor,
-                           std::vector<mirror::HeapReference<Object>*>* references,
+                           std::vector<mirror::HeapReference<mirror::Object>*>* references,
                            bool* has_target_reference)
       : mod_union_table_(mod_union_table),
         visitor_(visitor),
         references_(references),
         has_target_reference_(has_target_reference) {}
 
-  void operator()(Object* obj) const
+  void operator()(mirror::Object* obj) const
       SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
     // We don't have an early exit since we use the visitor pattern, an early
     // exit should significantly speed this up.
@@ -243,23 +246,23 @@
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
   MarkObjectVisitor* const visitor_;
-  std::vector<mirror::HeapReference<Object>*>* const references_;
+  std::vector<mirror::HeapReference<mirror::Object>*>* const references_;
   bool* const has_target_reference_;
 };
 
 class CheckReferenceVisitor {
  public:
   CheckReferenceVisitor(ModUnionTableReferenceCache* mod_union_table,
-                        const std::set<const Object*>& references)
+                        const std::set<mirror::Object*>& references)
       : mod_union_table_(mod_union_table),
-        references_(references) {
-  }
+        references_(references) {}
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
-  void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
       SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
     mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset);
-    if (ref != nullptr && mod_union_table_->ShouldAddReference(ref) &&
+    if (ref != nullptr &&
+        mod_union_table_->ShouldAddReference(ref) &&
         references_.find(ref) == references_.end()) {
       Heap* heap = mod_union_table_->GetHeap();
       space::ContinuousSpace* from_space = heap->FindContinuousSpaceFromObject(obj, false);
@@ -290,18 +293,17 @@
 
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
-  const std::set<const Object*>& references_;
+  const std::set<mirror::Object*>& references_;
 };
 
 class ModUnionCheckReferences {
  public:
   ModUnionCheckReferences(ModUnionTableReferenceCache* mod_union_table,
-                          const std::set<const Object*>& references)
+                          const std::set<mirror::Object*>& references)
       REQUIRES(Locks::heap_bitmap_lock_)
-      : mod_union_table_(mod_union_table), references_(references) {
-  }
+      : mod_union_table_(mod_union_table), references_(references) {}
 
-  void operator()(Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
     Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
     CheckReferenceVisitor visitor(mod_union_table_, references_);
     obj->VisitReferences(visitor, VoidFunctor());
@@ -309,13 +311,13 @@
 
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
-  const std::set<const Object*>& references_;
+  const std::set<mirror::Object*>& references_;
 };
 
 void ModUnionTableReferenceCache::Verify() {
   // Start by checking that everything in the mod union table is marked.
   for (const auto& ref_pair : references_) {
-    for (mirror::HeapReference<Object>* ref : ref_pair.second) {
+    for (mirror::HeapReference<mirror::Object>* ref : ref_pair.second) {
       CHECK(heap_->IsLiveObjectLocked(ref->AsMirrorPtr()));
     }
   }
@@ -326,8 +328,8 @@
   for (const auto& ref_pair : references_) {
     const uint8_t* card = ref_pair.first;
     if (*card == CardTable::kCardClean) {
-      std::set<const Object*> reference_set;
-      for (mirror::HeapReference<Object>* obj_ptr : ref_pair.second) {
+      std::set<mirror::Object*> reference_set;
+      for (mirror::HeapReference<mirror::Object>* obj_ptr : ref_pair.second) {
         reference_set.insert(obj_ptr->AsMirrorPtr());
       }
       ModUnionCheckReferences visitor(this, reference_set);
@@ -351,7 +353,7 @@
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
     uintptr_t end = start + CardTable::kCardSize;
     os << reinterpret_cast<void*>(start) << "-" << reinterpret_cast<void*>(end) << "->{";
-    for (mirror::HeapReference<Object>* ref : ref_pair.second) {
+    for (mirror::HeapReference<mirror::Object>* ref : ref_pair.second) {
       os << reinterpret_cast<const void*>(ref->AsMirrorPtr()) << ",";
     }
     os << "},";
@@ -360,7 +362,7 @@
 
 void ModUnionTableReferenceCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
   CardTable* const card_table = heap_->GetCardTable();
-  std::vector<mirror::HeapReference<Object>*> cards_references;
+  std::vector<mirror::HeapReference<mirror::Object>*> cards_references;
   // If has_target_reference is true then there was a GcRoot compressed reference which wasn't
   // added. In this case we need to keep the card dirty.
   // We don't know if the GcRoot addresses will remain constant, for example, classloaders have a
@@ -375,7 +377,7 @@
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
     uintptr_t end = start + CardTable::kCardSize;
     space::ContinuousSpace* space =
-        heap_->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
+        heap_->FindContinuousSpaceFromObject(reinterpret_cast<mirror::Object*>(start), false);
     DCHECK(space != nullptr);
     ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
     live_bitmap->VisitMarkedRange(start, end, add_visitor);
@@ -402,12 +404,12 @@
   cleared_cards_ = std::move(new_cleared_cards);
   size_t count = 0;
   for (auto it = references_.begin(); it != references_.end();) {
-    std::vector<mirror::HeapReference<Object>*>& references = it->second;
+    std::vector<mirror::HeapReference<mirror::Object>*>& references = it->second;
     // Since there is no card mark for setting a reference to null, we check each reference.
     // If all of the references of a card are null then we can remove that card. This is racy
     // with the mutators, but handled by rescanning dirty cards.
     bool all_null = true;
-    for (mirror::HeapReference<Object>* obj_ptr : references) {
+    for (mirror::HeapReference<mirror::Object>* obj_ptr : references) {
       if (obj_ptr->AsMirrorPtr() != nullptr) {
         all_null = false;
         visitor->MarkHeapReference(obj_ptr);
@@ -426,7 +428,8 @@
   }
 }
 
-ModUnionTableCardCache::ModUnionTableCardCache(const std::string& name, Heap* heap,
+ModUnionTableCardCache::ModUnionTableCardCache(const std::string& name,
+                                               Heap* heap,
                                                space::ContinuousSpace* space)
     : ModUnionTable(name, heap, space) {
   // Normally here we could use End() instead of Limit(), but for testing we may want to have a
@@ -441,10 +444,15 @@
 
 class CardBitVisitor {
  public:
-  CardBitVisitor(MarkObjectVisitor* visitor, space::ContinuousSpace* space,
-                 space::ContinuousSpace* immune_space, ModUnionTable::CardBitmap* card_bitmap)
-      : visitor_(visitor), space_(space), immune_space_(immune_space),
-        bitmap_(space->GetLiveBitmap()), card_bitmap_(card_bitmap) {
+  CardBitVisitor(MarkObjectVisitor* visitor,
+                 space::ContinuousSpace* space,
+                 space::ContinuousSpace* immune_space,
+                 ModUnionTable::CardBitmap* card_bitmap)
+      : visitor_(visitor),
+        space_(space),
+        immune_space_(immune_space),
+        bitmap_(space->GetLiveBitmap()),
+        card_bitmap_(card_bitmap) {
     DCHECK(immune_space_ != nullptr);
   }
 
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index 5888193..a7a4246 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -29,26 +29,17 @@
 
 namespace art {
 namespace mirror {
-  class Object;
+class Object;
 }  // namespace mirror
 
 namespace gc {
-
-namespace collector {
-  class MarkSweep;
-}  // namespace collector
 namespace space {
   class ContinuousSpace;
-  class Space;
 }  // namespace space
-
 class Heap;
 
 namespace accounting {
 
-class Bitmap;
-class HeapBitmap;
-
 // The mod-union table is the union of modified cards. It is used to allow the card table to be
 // cleared between GC phases, reducing the number of dirty cards that need to be scanned.
 class ModUnionTable {
@@ -60,8 +51,7 @@
   explicit ModUnionTable(const std::string& name, Heap* heap, space::ContinuousSpace* space)
       : name_(name),
         heap_(heap),
-        space_(space) {
-  }
+        space_(space) {}
 
   virtual ~ModUnionTable() {}
 
@@ -89,12 +79,15 @@
   virtual bool ContainsCardFor(uintptr_t addr) = 0;
 
   virtual void Dump(std::ostream& os) = 0;
+
   space::ContinuousSpace* GetSpace() {
     return space_;
   }
+
   Heap* GetHeap() const {
     return heap_;
   }
+
   const std::string& GetName() const {
     return name_;
   }
@@ -111,6 +104,7 @@
   explicit ModUnionTableReferenceCache(const std::string& name, Heap* heap,
                                        space::ContinuousSpace* space)
       : ModUnionTable(name, heap, space) {}
+
   virtual ~ModUnionTableReferenceCache() {}
 
   // Clear and store cards for a space.
@@ -151,6 +145,7 @@
   // Note: There is assumption that the space End() doesn't change.
   explicit ModUnionTableCardCache(const std::string& name, Heap* heap,
                                   space::ContinuousSpace* space);
+
   virtual ~ModUnionTableCardCache() {}
 
   // Clear and store cards for a space.
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 57af959..399591b 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1073,9 +1073,14 @@
   if (to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
       to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
       !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())) {
-    // Leave References gray so that GetReferent() will trigger RB.
+    // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
+    // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
     CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
   } else {
+    // We may occasionally leave a Reference black or white in the queue if its referent happens to
+    // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
+    // the above IsInToSpace() evaluates to true and we change the color from gray to black or white
+    // here in this else block.
 #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
     if (kUseBakerReadBarrier) {
       if (region_space_->IsInToSpace(to_ref)) {
@@ -1467,7 +1472,7 @@
         ArtMethod* method = gc_root_source->GetArtMethod();
         LOG(INTERNAL_FATAL) << "gc root in method " << method << " " << PrettyMethod(method);
         RootPrinter root_printer;
-        method->VisitRoots(root_printer);
+        method->VisitRoots(root_printer, sizeof(void*));
       }
       ref->GetLockWord(false).Dump(LOG(INTERNAL_FATAL));
       region_space_->DumpNonFreeRegions(LOG(INTERNAL_FATAL));
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 7ddc7cc..089f453 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -421,12 +421,11 @@
           if (heap_bitmap_exclusive_locked) {
             Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
           }
-          Locks::mutator_lock_->SharedUnlock(self);
-          ThreadList* tl = Runtime::Current()->GetThreadList();
-          tl->SuspendAll(__FUNCTION__);
-          mark_sweep_->VerifyRoots();
-          tl->ResumeAll();
-          Locks::mutator_lock_->SharedLock(self);
+          {
+            ScopedThreadSuspension(self, kSuspended);
+            ScopedSuspendAll ssa(__FUNCTION__);
+            mark_sweep_->VerifyRoots();
+          }
           if (heap_bitmap_exclusive_locked) {
             Locks::heap_bitmap_lock_->ExclusiveLock(self);
           }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 9292c7a..cfe7713 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -639,10 +639,9 @@
     background_collector_type_ = foreground_collector_type_;
   }
   TransitionCollector(foreground_collector_type_);
-  ThreadList* tl = Runtime::Current()->GetThreadList();
-  Thread* self = Thread::Current();
+  Thread* const self = Thread::Current();
   ScopedThreadStateChange tsc(self, kSuspended);
-  tl->SuspendAll(__FUNCTION__);
+  ScopedSuspendAll ssa(__FUNCTION__);
   // Something may have caused the transition to fail.
   if (!IsMovingGc(collector_type_) && non_moving_space_ != main_space_) {
     CHECK(main_space_ != nullptr);
@@ -657,7 +656,6 @@
     non_moving_space_ = main_space_;
     CHECK(!non_moving_space_->CanMoveObjects());
   }
-  tl->ResumeAll();
 }
 
 std::string Heap::SafeGetClassDescriptor(mirror::Class* klass) {
@@ -889,11 +887,9 @@
     IncrementDisableMovingGC(self);
     {
       ScopedThreadSuspension sts(self, kWaitingForVisitObjects);
-      ThreadList* tl = Runtime::Current()->GetThreadList();
-      tl->SuspendAll(__FUNCTION__);
+      ScopedSuspendAll ssa(__FUNCTION__);
       VisitObjectsInternalRegionSpace(callback, arg);
       VisitObjectsInternal(callback, arg);
-      tl->ResumeAll();
     }
     DecrementDisableMovingGC(self);
   } else {
@@ -1267,12 +1263,13 @@
     // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care
     // about pauses.
     Runtime* runtime = Runtime::Current();
-    runtime->GetThreadList()->SuspendAll(__FUNCTION__);
-    uint64_t start_time = NanoTime();
-    size_t count = runtime->GetMonitorList()->DeflateMonitors();
-    VLOG(heap) << "Deflating " << count << " monitors took "
-        << PrettyDuration(NanoTime() - start_time);
-    runtime->GetThreadList()->ResumeAll();
+    {
+      ScopedSuspendAll ssa(__FUNCTION__);
+      uint64_t start_time = NanoTime();
+      size_t count = runtime->GetMonitorList()->DeflateMonitors();
+      VLOG(heap) << "Deflating " << count << " monitors took "
+          << PrettyDuration(NanoTime() - start_time);
+    }
     ATRACE_END();
   }
   TrimIndirectReferenceTables(self);
@@ -1749,19 +1746,15 @@
 }
 
 size_t Heap::GetObjectsAllocated() const {
-  Thread* self = Thread::Current();
+  Thread* const self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingForGetObjectsAllocated);
-  auto* tl = Runtime::Current()->GetThreadList();
   // Need SuspendAll here to prevent lock violation if RosAlloc does it during InspectAll.
-  tl->SuspendAll(__FUNCTION__);
+  ScopedSuspendAll ssa(__FUNCTION__);
+  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
   size_t total = 0;
-  {
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    for (space::AllocSpace* space : alloc_spaces_) {
-      total += space->GetObjectsAllocated();
-    }
+  for (space::AllocSpace* space : alloc_spaces_) {
+    total += space->GetObjectsAllocated();
   }
-  tl->ResumeAll();
   return total;
 }
 
@@ -1911,7 +1904,6 @@
   // Inc requested homogeneous space compaction.
   count_requested_homogeneous_space_compaction_++;
   // Store performed homogeneous space compaction at a new request arrival.
-  ThreadList* tl = Runtime::Current()->GetThreadList();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
   {
@@ -1938,34 +1930,34 @@
     FinishGC(self, collector::kGcTypeNone);
     return HomogeneousSpaceCompactResult::kErrorVMShuttingDown;
   }
-  // Suspend all threads.
-  tl->SuspendAll(__FUNCTION__);
-  uint64_t start_time = NanoTime();
-  // Launch compaction.
-  space::MallocSpace* to_space = main_space_backup_.release();
-  space::MallocSpace* from_space = main_space_;
-  to_space->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
-  const uint64_t space_size_before_compaction = from_space->Size();
-  AddSpace(to_space);
-  // Make sure that we will have enough room to copy.
-  CHECK_GE(to_space->GetFootprintLimit(), from_space->GetFootprintLimit());
-  collector::GarbageCollector* collector = Compact(to_space, from_space,
-                                                   kGcCauseHomogeneousSpaceCompact);
-  const uint64_t space_size_after_compaction = to_space->Size();
-  main_space_ = to_space;
-  main_space_backup_.reset(from_space);
-  RemoveSpace(from_space);
-  SetSpaceAsDefault(main_space_);  // Set as default to reset the proper dlmalloc space.
-  // Update performed homogeneous space compaction count.
-  count_performed_homogeneous_space_compaction_++;
-  // Print statics log and resume all threads.
-  uint64_t duration = NanoTime() - start_time;
-  VLOG(heap) << "Heap homogeneous space compaction took " << PrettyDuration(duration) << " size: "
-             << PrettySize(space_size_before_compaction) << " -> "
-             << PrettySize(space_size_after_compaction) << " compact-ratio: "
-             << std::fixed << static_cast<double>(space_size_after_compaction) /
-             static_cast<double>(space_size_before_compaction);
-  tl->ResumeAll();
+  collector::GarbageCollector* collector;
+  {
+    ScopedSuspendAll ssa(__FUNCTION__);
+    uint64_t start_time = NanoTime();
+    // Launch compaction.
+    space::MallocSpace* to_space = main_space_backup_.release();
+    space::MallocSpace* from_space = main_space_;
+    to_space->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+    const uint64_t space_size_before_compaction = from_space->Size();
+    AddSpace(to_space);
+    // Make sure that we will have enough room to copy.
+    CHECK_GE(to_space->GetFootprintLimit(), from_space->GetFootprintLimit());
+    collector = Compact(to_space, from_space, kGcCauseHomogeneousSpaceCompact);
+    const uint64_t space_size_after_compaction = to_space->Size();
+    main_space_ = to_space;
+    main_space_backup_.reset(from_space);
+    RemoveSpace(from_space);
+    SetSpaceAsDefault(main_space_);  // Set as default to reset the proper dlmalloc space.
+    // Update performed homogeneous space compaction count.
+    count_performed_homogeneous_space_compaction_++;
+    // Print statics log and resume all threads.
+    uint64_t duration = NanoTime() - start_time;
+    VLOG(heap) << "Heap homogeneous space compaction took " << PrettyDuration(duration) << " size: "
+               << PrettySize(space_size_before_compaction) << " -> "
+               << PrettySize(space_size_after_compaction) << " compact-ratio: "
+               << std::fixed << static_cast<double>(space_size_after_compaction) /
+               static_cast<double>(space_size_before_compaction);
+  }
   // Finish GC.
   reference_processor_->EnqueueClearedReferences(self);
   GrowForUtilization(semi_space_collector_);
@@ -1983,7 +1975,6 @@
   uint64_t start_time = NanoTime();
   uint32_t before_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
   Runtime* const runtime = Runtime::Current();
-  ThreadList* const tl = runtime->GetThreadList();
   Thread* const self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
@@ -2021,84 +2012,91 @@
     return;
   }
   collector::GarbageCollector* collector = nullptr;
-  tl->SuspendAll(__FUNCTION__);
-  switch (collector_type) {
-    case kCollectorTypeSS: {
-      if (!IsMovingGc(collector_type_)) {
-        // Create the bump pointer space from the backup space.
-        CHECK(main_space_backup_ != nullptr);
-        std::unique_ptr<MemMap> mem_map(main_space_backup_->ReleaseMemMap());
-        // We are transitioning from non moving GC -> moving GC, since we copied from the bump
-        // pointer space last transition it will be protected.
-        CHECK(mem_map != nullptr);
-        mem_map->Protect(PROT_READ | PROT_WRITE);
-        bump_pointer_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space",
-                                                                        mem_map.release());
-        AddSpace(bump_pointer_space_);
-        collector = Compact(bump_pointer_space_, main_space_, kGcCauseCollectorTransition);
-        // Use the now empty main space mem map for the bump pointer temp space.
-        mem_map.reset(main_space_->ReleaseMemMap());
-        // Unset the pointers just in case.
-        if (dlmalloc_space_ == main_space_) {
-          dlmalloc_space_ = nullptr;
-        } else if (rosalloc_space_ == main_space_) {
-          rosalloc_space_ = nullptr;
-        }
-        // Remove the main space so that we don't try to trim it, this doens't work for debug
-        // builds since RosAlloc attempts to read the magic number from a protected page.
-        RemoveSpace(main_space_);
-        RemoveRememberedSet(main_space_);
-        delete main_space_;  // Delete the space since it has been removed.
-        main_space_ = nullptr;
-        RemoveRememberedSet(main_space_backup_.get());
-        main_space_backup_.reset(nullptr);  // Deletes the space.
-        temp_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space 2",
-                                                                mem_map.release());
-        AddSpace(temp_space_);
-      }
-      break;
-    }
-    case kCollectorTypeMS:
-      // Fall through.
-    case kCollectorTypeCMS: {
-      if (IsMovingGc(collector_type_)) {
-        CHECK(temp_space_ != nullptr);
-        std::unique_ptr<MemMap> mem_map(temp_space_->ReleaseMemMap());
-        RemoveSpace(temp_space_);
-        temp_space_ = nullptr;
-        mem_map->Protect(PROT_READ | PROT_WRITE);
-        CreateMainMallocSpace(mem_map.get(), kDefaultInitialSize,
-                              std::min(mem_map->Size(), growth_limit_), mem_map->Size());
-        mem_map.release();
-        // Compact to the main space from the bump pointer space, don't need to swap semispaces.
-        AddSpace(main_space_);
-        collector = Compact(main_space_, bump_pointer_space_, kGcCauseCollectorTransition);
-        mem_map.reset(bump_pointer_space_->ReleaseMemMap());
-        RemoveSpace(bump_pointer_space_);
-        bump_pointer_space_ = nullptr;
-        const char* name = kUseRosAlloc ? kRosAllocSpaceName[1] : kDlMallocSpaceName[1];
-        // Temporarily unprotect the backup mem map so rosalloc can write the debug magic number.
-        if (kIsDebugBuild && kUseRosAlloc) {
+  {
+    ScopedSuspendAll ssa(__FUNCTION__);
+    switch (collector_type) {
+      case kCollectorTypeSS: {
+        if (!IsMovingGc(collector_type_)) {
+          // Create the bump pointer space from the backup space.
+          CHECK(main_space_backup_ != nullptr);
+          std::unique_ptr<MemMap> mem_map(main_space_backup_->ReleaseMemMap());
+          // We are transitioning from non moving GC -> moving GC, since we copied from the bump
+          // pointer space last transition it will be protected.
+          CHECK(mem_map != nullptr);
           mem_map->Protect(PROT_READ | PROT_WRITE);
+          bump_pointer_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space",
+                                                                          mem_map.release());
+          AddSpace(bump_pointer_space_);
+          collector = Compact(bump_pointer_space_, main_space_, kGcCauseCollectorTransition);
+          // Use the now empty main space mem map for the bump pointer temp space.
+          mem_map.reset(main_space_->ReleaseMemMap());
+          // Unset the pointers just in case.
+          if (dlmalloc_space_ == main_space_) {
+            dlmalloc_space_ = nullptr;
+          } else if (rosalloc_space_ == main_space_) {
+            rosalloc_space_ = nullptr;
+          }
+          // Remove the main space so that we don't try to trim it, this doens't work for debug
+          // builds since RosAlloc attempts to read the magic number from a protected page.
+          RemoveSpace(main_space_);
+          RemoveRememberedSet(main_space_);
+          delete main_space_;  // Delete the space since it has been removed.
+          main_space_ = nullptr;
+          RemoveRememberedSet(main_space_backup_.get());
+          main_space_backup_.reset(nullptr);  // Deletes the space.
+          temp_space_ = space::BumpPointerSpace::CreateFromMemMap("Bump pointer space 2",
+                                                                  mem_map.release());
+          AddSpace(temp_space_);
         }
-        main_space_backup_.reset(CreateMallocSpaceFromMemMap(
-            mem_map.get(), kDefaultInitialSize, std::min(mem_map->Size(), growth_limit_),
-            mem_map->Size(), name, true));
-        if (kIsDebugBuild && kUseRosAlloc) {
-          mem_map->Protect(PROT_NONE);
-        }
-        mem_map.release();
+        break;
       }
-      break;
+      case kCollectorTypeMS:
+        // Fall through.
+      case kCollectorTypeCMS: {
+        if (IsMovingGc(collector_type_)) {
+          CHECK(temp_space_ != nullptr);
+          std::unique_ptr<MemMap> mem_map(temp_space_->ReleaseMemMap());
+          RemoveSpace(temp_space_);
+          temp_space_ = nullptr;
+          mem_map->Protect(PROT_READ | PROT_WRITE);
+          CreateMainMallocSpace(mem_map.get(),
+                                kDefaultInitialSize,
+                                std::min(mem_map->Size(), growth_limit_),
+                                mem_map->Size());
+          mem_map.release();
+          // Compact to the main space from the bump pointer space, don't need to swap semispaces.
+          AddSpace(main_space_);
+          collector = Compact(main_space_, bump_pointer_space_, kGcCauseCollectorTransition);
+          mem_map.reset(bump_pointer_space_->ReleaseMemMap());
+          RemoveSpace(bump_pointer_space_);
+          bump_pointer_space_ = nullptr;
+          const char* name = kUseRosAlloc ? kRosAllocSpaceName[1] : kDlMallocSpaceName[1];
+          // Temporarily unprotect the backup mem map so rosalloc can write the debug magic number.
+          if (kIsDebugBuild && kUseRosAlloc) {
+            mem_map->Protect(PROT_READ | PROT_WRITE);
+          }
+          main_space_backup_.reset(CreateMallocSpaceFromMemMap(
+              mem_map.get(),
+              kDefaultInitialSize,
+              std::min(mem_map->Size(), growth_limit_),
+              mem_map->Size(),
+              name,
+              true));
+          if (kIsDebugBuild && kUseRosAlloc) {
+            mem_map->Protect(PROT_NONE);
+          }
+          mem_map.release();
+        }
+        break;
+      }
+      default: {
+        LOG(FATAL) << "Attempted to transition to invalid collector type "
+                   << static_cast<size_t>(collector_type);
+        break;
+      }
     }
-    default: {
-      LOG(FATAL) << "Attempted to transition to invalid collector type "
-                 << static_cast<size_t>(collector_type);
-      break;
-    }
+    ChangeCollector(collector_type);
   }
-  ChangeCollector(collector_type);
-  tl->ResumeAll();
   // Can't call into java code with all threads suspended.
   reference_processor_->EnqueueClearedReferences(self);
   uint64_t duration = NanoTime() - start_time;
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index f505428..56957ba 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -89,19 +89,38 @@
   Heap* heap = Runtime::Current()->GetHeap();
   if (kUseBakerOrBrooksReadBarrier && heap->CurrentCollectorType() == kCollectorTypeCC &&
       heap->ConcurrentCopyingCollector()->IsActive()) {
-    // Clear the gray ptr we left in ConcurrentCopying::ProcessMarkStack().
-    // We don't want to do this when the zygote compaction collector (SemiSpace) is running.
+    // Change the gray ptr we left in ConcurrentCopying::ProcessMarkStackRef() to black or white.
+    // We check IsActive() above because we don't want to do this when the zygote compaction
+    // collector (SemiSpace) is running.
     CHECK(ref != nullptr);
-    CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr())
-        << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
-    if (heap->ConcurrentCopyingCollector()->RegionSpace()->IsInToSpace(ref)) {
-      // Moving objects.
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
-      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+    collector::ConcurrentCopying* concurrent_copying = heap->ConcurrentCopyingCollector();
+    const bool is_moving = concurrent_copying->RegionSpace()->IsInToSpace(ref);
+    if (ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      if (is_moving) {
+        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
+        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+      } else {
+        ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::BlackPtr());
+        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
+      }
     } else {
-      // Non-moving objects.
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::BlackPtr());
-      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr());
+      // In ConcurrentCopying::ProcessMarkStackRef() we may leave a black or white Reference in the
+      // queue and find it here, which is OK. Check that the color makes sense depending on whether
+      // the Reference is moving or not and that the referent has been marked.
+      if (is_moving) {
+        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
+            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
+      } else {
+        CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr())
+            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer();
+      }
+      mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
+      // The referent could be null if it's cleared by a mutator (Reference.clear()).
+      if (referent != nullptr) {
+        CHECK(concurrent_copying->IsInToSpace(referent))
+            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer()
+            << " referent=" << referent;
+      }
     }
   }
   return ref;
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index d8072ea..49126d2 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -303,17 +303,13 @@
     void* arg, bool do_null_callback_at_end) NO_THREAD_SAFETY_ANALYSIS {
   // TODO: NO_THREAD_SAFETY_ANALYSIS.
   Thread* self = Thread::Current();
-  ThreadList* tl = Runtime::Current()->GetThreadList();
-  tl->SuspendAll(__FUNCTION__);
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    MutexLock mu2(self, *Locks::thread_list_lock_);
-    rosalloc_->InspectAll(callback, arg);
-    if (do_null_callback_at_end) {
-      callback(nullptr, nullptr, 0, arg);  // Indicate end of a space.
-    }
+  ScopedSuspendAll ssa(__FUNCTION__);
+  MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+  MutexLock mu2(self, *Locks::thread_list_lock_);
+  rosalloc_->InspectAll(callback, arg);
+  if (do_null_callback_at_end) {
+    callback(nullptr, nullptr, 0, arg);  // Indicate end of a space.
   }
-  tl->ResumeAll();
 }
 
 void RosAllocSpace::InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index e2094dc..dfc1f5f 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -1403,10 +1403,11 @@
     // comment in Heap::VisitObjects().
     heap->IncrementDisableMovingGC(self);
   }
-  Runtime::Current()->GetThreadList()->SuspendAll(__FUNCTION__, true /* long suspend */);
-  Hprof hprof(filename, fd, direct_to_ddms);
-  hprof.Dump();
-  Runtime::Current()->GetThreadList()->ResumeAll();
+  {
+    ScopedSuspendAll ssa(__FUNCTION__, true /* long suspend */);
+    Hprof hprof(filename, fd, direct_to_ddms);
+    hprof.Dump();
+  }
   if (heap->IsGcConcurrentAndMoving()) {
     heap->DecrementDisableMovingGC(self);
   }
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 63c02ed..7e2a84d 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -407,6 +407,10 @@
     backward_branch_listeners_.push_back(listener);
     have_backward_branch_listeners_ = true;
   }
+  if (HasEvent(kInvokeVirtualOrInterface, events)) {
+    invoke_virtual_or_interface_listeners_.push_back(listener);
+    have_invoke_virtual_or_interface_listeners_ = true;
+  }
   if (HasEvent(kDexPcMoved, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_dex_pc_listeners_) {
@@ -466,13 +470,17 @@
     have_method_exit_listeners_ = !method_exit_listeners_.empty();
   }
   if (HasEvent(kMethodUnwind, events) && have_method_unwind_listeners_) {
-      method_unwind_listeners_.remove(listener);
-      have_method_unwind_listeners_ = !method_unwind_listeners_.empty();
+    method_unwind_listeners_.remove(listener);
+    have_method_unwind_listeners_ = !method_unwind_listeners_.empty();
   }
   if (HasEvent(kBackwardBranch, events) && have_backward_branch_listeners_) {
-      backward_branch_listeners_.remove(listener);
-      have_backward_branch_listeners_ = !backward_branch_listeners_.empty();
-    }
+    backward_branch_listeners_.remove(listener);
+    have_backward_branch_listeners_ = !backward_branch_listeners_.empty();
+  }
+  if (HasEvent(kInvokeVirtualOrInterface, events) && have_invoke_virtual_or_interface_listeners_) {
+    invoke_virtual_or_interface_listeners_.remove(listener);
+    have_invoke_virtual_or_interface_listeners_ = !invoke_virtual_or_interface_listeners_.empty();
+  }
   if (HasEvent(kDexPcMoved, events) && have_dex_pc_listeners_) {
     std::list<InstrumentationListener*>* modified =
         new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
@@ -602,19 +610,17 @@
 void Instrumentation::SetEntrypointsInstrumented(bool instrumented) {
   Thread* self = Thread::Current();
   Runtime* runtime = Runtime::Current();
-  ThreadList* tl = runtime->GetThreadList();
   Locks::mutator_lock_->AssertNotHeld(self);
   Locks::instrument_entrypoints_lock_->AssertHeld(self);
   if (runtime->IsStarted()) {
-    tl->SuspendAll(__FUNCTION__);
-  }
-  {
+    ScopedSuspendAll ssa(__FUNCTION__);
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
     SetQuickAllocEntryPointsInstrumented(instrumented);
     ResetQuickAllocEntryPoints();
-  }
-  if (runtime->IsStarted()) {
-    tl->ResumeAll();
+  } else {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    SetQuickAllocEntryPointsInstrumented(instrumented);
+    ResetQuickAllocEntryPoints();
   }
 }
 
@@ -908,6 +914,16 @@
   }
 }
 
+void Instrumentation::InvokeVirtualOrInterfaceImpl(Thread* thread,
+                                                   mirror::Object* this_object,
+                                                   ArtMethod* caller,
+                                                   uint32_t dex_pc,
+                                                   ArtMethod* callee) const {
+  for (InstrumentationListener* listener : invoke_virtual_or_interface_listeners_) {
+    listener->InvokeVirtualOrInterface(thread, this_object, caller, dex_pc, callee);
+  }
+}
+
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field) const {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 93ff567..6711ac3 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -97,6 +97,14 @@
   // Call-back for when we get a backward branch.
   virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+
+  // Call-back for when we get an invokevirtual or an invokeinterface.
+  virtual void InvokeVirtualOrInterface(Thread* thread,
+                                        mirror::Object* this_object,
+                                        ArtMethod* caller,
+                                        uint32_t dex_pc,
+                                        ArtMethod* callee)
+      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 };
 
 // Instrumentation is a catch-all for when extra information is required from the runtime. The
@@ -114,6 +122,7 @@
     kFieldWritten = 0x20,
     kExceptionCaught = 0x40,
     kBackwardBranch = 0x80,
+    kInvokeVirtualOrInterface = 0x100,
   };
 
   enum class InstrumentationLevel {
@@ -257,6 +266,10 @@
     return have_backward_branch_listeners_;
   }
 
+  bool HasInvokeVirtualOrInterfaceListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_invoke_virtual_or_interface_listeners_;
+  }
+
   bool IsActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
@@ -325,6 +338,17 @@
     }
   }
 
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (UNLIKELY(HasInvokeVirtualOrInterfaceListeners())) {
+      InvokeVirtualOrInterfaceImpl(thread, this_object, caller, dex_pc, callee);
+    }
+  }
+
   // Inform listeners that an exception was caught.
   void ExceptionCaughtEvent(Thread* thread, mirror::Throwable* exception_object) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -385,6 +409,12 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void BackwardBranchImpl(Thread* thread, ArtMethod* method, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void InvokeVirtualOrInterfaceImpl(Thread* thread,
+                                    mirror::Object* this_object,
+                                    ArtMethod* caller,
+                                    uint32_t dex_pc,
+                                    ArtMethod* callee) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                            ArtMethod* method, uint32_t dex_pc,
                            ArtField* field) const
@@ -451,6 +481,9 @@
   // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
+  // Do we have any invoke listeners? Short-cut to avoid taking the instrumentation_lock_.
+  bool have_invoke_virtual_or_interface_listeners_ GUARDED_BY(Locks::mutator_lock_);
+
   // Contains the instrumentation level required by each client of the instrumentation identified
   // by a string key.
   typedef SafeMap<const char*, InstrumentationLevel> InstrumentationLevelTable;
@@ -461,6 +494,8 @@
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> invoke_virtual_or_interface_listeners_
+      GUARDED_BY(Locks::mutator_lock_);
   std::shared_ptr<std::list<InstrumentationListener*>> dex_pc_listeners_
       GUARDED_BY(Locks::mutator_lock_);
   std::shared_ptr<std::list<InstrumentationListener*>> field_read_listeners_
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index 56fe9ef..d98d246 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -36,7 +36,8 @@
     : received_method_enter_event(false), received_method_exit_event(false),
       received_method_unwind_event(false), received_dex_pc_moved_event(false),
       received_field_read_event(false), received_field_written_event(false),
-      received_exception_caught_event(false), received_backward_branch_event(false) {}
+      received_exception_caught_event(false), received_backward_branch_event(false),
+      received_invoke_virtual_or_interface_event(false) {}
 
   virtual ~TestInstrumentationListener() {}
 
@@ -105,6 +106,15 @@
     received_backward_branch_event = true;
   }
 
+  void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
+                                mirror::Object* this_object ATTRIBUTE_UNUSED,
+                                ArtMethod* caller ATTRIBUTE_UNUSED,
+                                uint32_t dex_pc ATTRIBUTE_UNUSED,
+                                ArtMethod* callee ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    received_invoke_virtual_or_interface_event = true;
+  }
+
   void Reset() {
     received_method_enter_event = false;
     received_method_exit_event = false;
@@ -114,6 +124,7 @@
     received_field_written_event = false;
     received_exception_caught_event = false;
     received_backward_branch_event = false;
+    received_invoke_virtual_or_interface_event = false;
   }
 
   bool received_method_enter_event;
@@ -124,6 +135,7 @@
   bool received_field_written_event;
   bool received_exception_caught_event;
   bool received_backward_branch_event;
+  bool received_invoke_virtual_or_interface_event;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(TestInstrumentationListener);
@@ -138,13 +150,9 @@
   void CheckConfigureStubs(const char* key, Instrumentation::InstrumentationLevel level) {
     ScopedObjectAccess soa(Thread::Current());
     instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
-    {
-      ScopedThreadSuspension sts(soa.Self(), kSuspended);
-      Runtime* runtime = Runtime::Current();
-      runtime->GetThreadList()->SuspendAll("Instrumentation::ConfigureStubs");
-      instr->ConfigureStubs(key, level);
-      runtime->GetThreadList()->ResumeAll();
-    }
+    ScopedThreadSuspension sts(soa.Self(), kSuspended);
+    ScopedSuspendAll ssa("Instrumentation::ConfigureStubs");
+    instr->ConfigureStubs(key, level);
   }
 
   Instrumentation::InstrumentationLevel GetCurrentInstrumentationLevel() {
@@ -162,10 +170,8 @@
     TestInstrumentationListener listener;
     {
       ScopedThreadSuspension sts(soa.Self(), kSuspended);
-      Runtime* runtime = Runtime::Current();
-      runtime->GetThreadList()->SuspendAll("Add instrumentation listener");
+      ScopedSuspendAll ssa("Add instrumentation listener");
       instr->AddListener(&listener, instrumentation_event);
-      runtime->GetThreadList()->ResumeAll();
     }
 
     ArtMethod* const event_method = nullptr;
@@ -181,10 +187,8 @@
     listener.Reset();
     {
       ScopedThreadSuspension sts(soa.Self(), kSuspended);
-      Runtime* runtime = Runtime::Current();
-      runtime->GetThreadList()->SuspendAll("Remove instrumentation listener");
+      ScopedSuspendAll ssa("Remove instrumentation listener");
       instr->RemoveListener(&listener, instrumentation_event);
-      runtime->GetThreadList()->ResumeAll();
     }
 
     // Check the listener is not registered and is not notified of the event.
@@ -199,12 +203,11 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
-    runtime->GetThreadList()->SuspendAll("Single method deoptimization");
+    ScopedSuspendAll ssa("Single method deoptimization");
     if (enable_deoptimization) {
       instrumentation->EnableDeoptimization();
     }
     instrumentation->Deoptimize(method);
-    runtime->GetThreadList()->ResumeAll();
   }
 
   void UndeoptimizeMethod(Thread* self, ArtMethod* method,
@@ -213,12 +216,11 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
-    runtime->GetThreadList()->SuspendAll("Single method undeoptimization");
+    ScopedSuspendAll ssa("Single method undeoptimization");
     instrumentation->Undeoptimize(method);
     if (disable_deoptimization) {
       instrumentation->DisableDeoptimization(key);
     }
-    runtime->GetThreadList()->ResumeAll();
   }
 
   void DeoptimizeEverything(Thread* self, const char* key, bool enable_deoptimization)
@@ -226,12 +228,11 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
-    runtime->GetThreadList()->SuspendAll("Full deoptimization");
+    ScopedSuspendAll ssa("Full deoptimization");
     if (enable_deoptimization) {
       instrumentation->EnableDeoptimization();
     }
     instrumentation->DeoptimizeEverything(key);
-    runtime->GetThreadList()->ResumeAll();
   }
 
   void UndeoptimizeEverything(Thread* self, const char* key, bool disable_deoptimization)
@@ -239,12 +240,11 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
-    runtime->GetThreadList()->SuspendAll("Full undeoptimization");
+    ScopedSuspendAll ssa("Full undeoptimization");
     instrumentation->UndeoptimizeEverything(key);
     if (disable_deoptimization) {
       instrumentation->DisableDeoptimization(key);
     }
-    runtime->GetThreadList()->ResumeAll();
   }
 
   void EnableMethodTracing(Thread* self, const char* key, bool needs_interpreter)
@@ -252,9 +252,8 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
-    runtime->GetThreadList()->SuspendAll("EnableMethodTracing");
+    ScopedSuspendAll ssa("EnableMethodTracing");
     instrumentation->EnableMethodTracing(key, needs_interpreter);
-    runtime->GetThreadList()->ResumeAll();
   }
 
   void DisableMethodTracing(Thread* self, const char* key)
@@ -262,9 +261,8 @@
     Runtime* runtime = Runtime::Current();
     instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
     ScopedThreadSuspension sts(self, kSuspended);
-    runtime->GetThreadList()->SuspendAll("EnableMethodTracing");
+    ScopedSuspendAll ssa("EnableMethodTracing");
     instrumentation->DisableMethodTracing(key);
-    runtime->GetThreadList()->ResumeAll();
   }
 
  private:
@@ -287,6 +285,8 @@
         return instr->HasExceptionCaughtListeners();
       case instrumentation::Instrumentation::kBackwardBranch:
         return instr->HasBackwardBranchListeners();
+      case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
+        return instr->HasInvokeVirtualOrInterfaceListeners();
       default:
         LOG(FATAL) << "Unknown instrumentation event " << event_type;
         UNREACHABLE();
@@ -330,6 +330,9 @@
       case instrumentation::Instrumentation::kBackwardBranch:
         instr->BackwardBranch(self, method, dex_pc);
         break;
+      case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
+        instr->InvokeVirtualOrInterface(self, obj, method, dex_pc, method);
+        break;
       default:
         LOG(FATAL) << "Unknown instrumentation event " << event_type;
         UNREACHABLE();
@@ -355,6 +358,8 @@
         return listener.received_exception_caught_event;
       case instrumentation::Instrumentation::kBackwardBranch:
         return listener.received_backward_branch_event;
+      case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
+        return listener.received_invoke_virtual_or_interface_event;
       default:
         LOG(FATAL) << "Unknown instrumentation event " << event_type;
         UNREACHABLE();
@@ -418,6 +423,10 @@
   TestEvent(instrumentation::Instrumentation::kBackwardBranch);
 }
 
+TEST_F(InstrumentationTest, InvokeVirtualOrInterfaceEvent) {
+  TestEvent(instrumentation::Instrumentation::kInvokeVirtualOrInterface);
+}
+
 TEST_F(InstrumentationTest, DeoptimizeDirectMethod) {
   ScopedObjectAccess soa(Thread::Current());
   jobject class_loader = LoadDex("Instrumentation");
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 6c6232c..3ac80c6 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -399,14 +399,19 @@
   JValue value;
   // Set value to last known result in case the shadow frame chain is empty.
   value.SetJ(ret_val->GetJ());
+  // Are we executing the first shadow frame?
+  bool first = true;
   while (shadow_frame != nullptr) {
     self->SetTopOfShadowStack(shadow_frame);
     const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
     const uint32_t dex_pc = shadow_frame->GetDexPC();
     uint32_t new_dex_pc;
     if (UNLIKELY(self->IsExceptionPending())) {
+      // If we deoptimize from the QuickExceptionHandler, we already reported the exception to
+      // the instrumentation. To prevent from reporting it a second time, we simply pass a
+      // null Instrumentation*.
       const instrumentation::Instrumentation* const instrumentation =
-          Runtime::Current()->GetInstrumentation();
+          first ? nullptr : Runtime::Current()->GetInstrumentation();
       uint32_t found_dex_pc = FindNextInstructionFollowingException(self, *shadow_frame, dex_pc,
                                                                     instrumentation);
       new_dex_pc = found_dex_pc;  // the dex pc of a matching catch handler
@@ -424,6 +429,7 @@
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
     ShadowFrame::DeleteDeoptimizedFrame(old_frame);
+    first = false;
   }
   ret_val->SetJ(value.GetJ());
 }
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index af67379..6602840 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -414,20 +414,21 @@
 #undef EXPLICIT_DO_IPUT_QUICK_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_IPUT_QUICK_TEMPLATE_DECL
 
+// We accept a null Instrumentation* meaning we must not report anything to the instrumentation.
 uint32_t FindNextInstructionFollowingException(
     Thread* self, ShadowFrame& shadow_frame, uint32_t dex_pc,
     const instrumentation::Instrumentation* instrumentation) {
   self->VerifyStack();
   StackHandleScope<2> hs(self);
   Handle<mirror::Throwable> exception(hs.NewHandle(self->GetException()));
-  if (instrumentation->HasExceptionCaughtListeners()
+  if (instrumentation != nullptr && instrumentation->HasExceptionCaughtListeners()
       && self->IsExceptionThrownByCurrentMethod(exception.Get())) {
     instrumentation->ExceptionCaughtEvent(self, exception.Get());
   }
   bool clear_exception = false;
   uint32_t found_dex_pc = shadow_frame.GetMethod()->FindCatchBlock(
       hs.NewHandle(exception->GetClass()), dex_pc, &clear_exception);
-  if (found_dex_pc == DexFile::kDexNoIndex) {
+  if (found_dex_pc == DexFile::kDexNoIndex && instrumentation != nullptr) {
     // Exception is not caught by the current method. We will unwind to the
     // caller. Notify any instrumentation listener.
     instrumentation->MethodUnwindEvent(self, shadow_frame.GetThisObject(),
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index fdefb9f..7398778 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -265,6 +265,13 @@
     result->SetJ(0);
     return false;
   } else {
+    if (type == kVirtual || type == kInterface) {
+      instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+      if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
+        instrumentation->InvokeVirtualOrInterface(
+            self, receiver, sf_method, shadow_frame.GetDexPC(), called_method);
+      }
+    }
     return DoCall<is_range, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
                                              result);
   }
@@ -297,6 +304,11 @@
     result->SetJ(0);
     return false;
   } else {
+    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
+      instrumentation->InvokeVirtualOrInterface(
+          self, receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
+    }
     // No need to check since we've been quickened.
     return DoCall<is_range, false>(called_method, self, shadow_frame, inst, inst_data, result);
   }
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 26a4fe4..0607493 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -39,6 +39,8 @@
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheCapacity);
   jit_options->compile_threshold_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
+  jit_options->warmup_threshold_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
   return jit_options;
@@ -160,18 +162,18 @@
   }
 }
 
-void Jit::CreateInstrumentationCache(size_t compile_threshold) {
+void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
   CHECK_GT(compile_threshold, 0U);
-  Runtime* const runtime = Runtime::Current();
-  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+  ScopedSuspendAll ssa(__FUNCTION__);
   // Add Jit interpreter instrumentation, tells the interpreter when to notify the jit to compile
   // something.
-  instrumentation_cache_.reset(new jit::JitInstrumentationCache(compile_threshold));
-  runtime->GetInstrumentation()->AddListener(
+  instrumentation_cache_.reset(
+      new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
+  Runtime::Current()->GetInstrumentation()->AddListener(
       new jit::JitInstrumentationListener(instrumentation_cache_.get()),
       instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch);
-  runtime->GetThreadList()->ResumeAll();
+      instrumentation::Instrumentation::kBackwardBranch |
+      instrumentation::Instrumentation::kInvokeVirtualOrInterface);
 }
 
 }  // namespace jit
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index ca6e7ea..643bc23 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -43,13 +43,14 @@
 class Jit {
  public:
   static constexpr bool kStressMode = kIsDebugBuild;
-  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 1 : 1000;
+  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 1000;
+  static constexpr size_t kDefaultWarmupThreshold = kDefaultCompileThreshold / 2;
 
   virtual ~Jit();
   static Jit* Create(JitOptions* options, std::string* error_msg);
   bool CompileMethod(ArtMethod* method, Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateInstrumentationCache(size_t compile_threshold);
+  void CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold);
   void CreateThreadPool();
   CompilerCallbacks* GetCompilerCallbacks() {
     return compiler_callbacks_;
@@ -95,6 +96,9 @@
   size_t GetCompileThreshold() const {
     return compile_threshold_;
   }
+  size_t GetWarmupThreshold() const {
+    return warmup_threshold_;
+  }
   size_t GetCodeCacheCapacity() const {
     return code_cache_capacity_;
   }
@@ -112,6 +116,7 @@
   bool use_jit_;
   size_t code_cache_capacity_;
   size_t compile_threshold_;
+  size_t warmup_threshold_;
   bool dump_info_on_shutdown_;
 
   JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0),
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index cd5f4cb..4c53162 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -82,9 +82,19 @@
   return code_cache_ptr_ - size;
 }
 
+uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
+  MutexLock mu(self, lock_);
+  size = RoundUp(size, sizeof(void*));
+  if (size > DataCacheRemain()) {
+    return nullptr;
+  }
+  data_cache_ptr_ += size;
+  return data_cache_ptr_ - size;
+}
+
 uint8_t* JitCodeCache::AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end) {
   MutexLock mu(self, lock_);
-  const size_t size = end - begin;
+  const size_t size = RoundUp(end - begin, sizeof(void*));
   if (size > DataCacheRemain()) {
     return nullptr;  // Out of space in the data cache.
   }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 9707f6f..f485e4a 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -86,6 +86,9 @@
   // Reserve a region of code of size at least "size". Returns null if there is no more room.
   uint8_t* ReserveCode(Thread* self, size_t size) REQUIRES(!lock_);
 
+  // Reserve a region of data of size at least "size". Returns null if there is no more room.
+  uint8_t* ReserveData(Thread* self, size_t size) REQUIRES(!lock_);
+
   // Add a data array of size (end - begin) with the associated contents, returns null if there
   // is no more room.
   uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 258c29d..d437dd5 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -26,16 +26,12 @@
 
 class JitCompileTask : public Task {
  public:
-  JitCompileTask(ArtMethod* method, JitInstrumentationCache* cache)
-      : method_(method), cache_(cache) {
-  }
+  explicit JitCompileTask(ArtMethod* method) : method_(method) {}
 
   virtual void Run(Thread* self) OVERRIDE {
     ScopedObjectAccess soa(self);
     VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-    if (Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
-      cache_->SignalCompiled(self, method_);
-    } else {
+    if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
       VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
     }
   }
@@ -46,13 +42,14 @@
 
  private:
   ArtMethod* const method_;
-  JitInstrumentationCache* const cache_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
 };
 
-JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold)
-    : lock_("jit instrumentation lock"), hot_method_threshold_(hot_method_threshold) {
+JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
+                                                 size_t warm_method_threshold)
+    : hot_method_threshold_(hot_method_threshold),
+      warm_method_threshold_(warm_method_threshold) {
 }
 
 void JitInstrumentationCache::CreateThreadPool() {
@@ -60,20 +57,11 @@
 }
 
 void JitInstrumentationCache::DeleteThreadPool() {
+  DCHECK(Runtime::Current()->IsShuttingDown(Thread::Current()));
   thread_pool_.reset();
 }
 
-void JitInstrumentationCache::SignalCompiled(Thread* self, ArtMethod* method) {
-  ScopedObjectAccessUnchecked soa(self);
-  jmethodID method_id = soa.EncodeMethod(method);
-  MutexLock mu(self, lock_);
-  auto it = samples_.find(method_id);
-  if (it != samples_.end()) {
-    samples_.erase(it);
-  }
-}
-
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t count) {
+void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
   ScopedObjectAccessUnchecked soa(self);
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
   // than we want resulting in samples even after the method is compiled.
@@ -81,34 +69,21 @@
       Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method)) {
     return;
   }
-  jmethodID method_id = soa.EncodeMethod(method);
-  bool is_hot = false;
-  {
-    MutexLock mu(self, lock_);
-    size_t sample_count = 0;
-    auto it = samples_.find(method_id);
-    if (it != samples_.end()) {
-      it->second += count;
-      sample_count = it->second;
-    } else {
-      sample_count = count;
-      samples_.insert(std::make_pair(method_id, count));
-    }
-    // If we have enough samples, mark as hot and request Jit compilation.
-    if (sample_count >= hot_method_threshold_ && sample_count - count < hot_method_threshold_) {
-      is_hot = true;
+  if (thread_pool_.get() == nullptr) {
+    DCHECK(Runtime::Current()->IsShuttingDown(self));
+    return;
+  }
+  uint16_t sample_count = method->IncrementCounter();
+  if (sample_count == warm_method_threshold_) {
+    ProfilingInfo* info = method->CreateProfilingInfo();
+    if (info != nullptr) {
+      VLOG(jit) << "Start profiling " << PrettyMethod(method);
     }
   }
-  if (is_hot) {
-    if (thread_pool_.get() != nullptr) {
-      thread_pool_->AddTask(self, new JitCompileTask(
-          method->GetInterfaceMethodIfProxy(sizeof(void*)), this));
-      thread_pool_->StartWorkers(self);
-    } else {
-      VLOG(jit) << "Compiling hot method " << PrettyMethod(method);
-      Runtime::Current()->GetJit()->CompileMethod(
-          method->GetInterfaceMethodIfProxy(sizeof(void*)), self);
-    }
+  if (sample_count == hot_method_threshold_) {
+    thread_pool_->AddTask(self, new JitCompileTask(
+        method->GetInterfaceMethodIfProxy(sizeof(void*))));
+    thread_pool_->StartWorkers(self);
   }
 }
 
@@ -117,5 +92,17 @@
   CHECK(instrumentation_cache_ != nullptr);
 }
 
+void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
+                                                          mirror::Object* this_object,
+                                                          ArtMethod* caller,
+                                                          uint32_t dex_pc,
+                                                          ArtMethod* callee ATTRIBUTE_UNUSED) {
+  DCHECK(this_object != nullptr);
+  ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
+  if (info != nullptr) {
+    info->AddInvokeInfo(thread, dex_pc, this_object->GetClass());
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 0deaf8a..6fdef65 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -45,18 +45,15 @@
 // Keeps track of which methods are hot.
 class JitInstrumentationCache {
  public:
-  explicit JitInstrumentationCache(size_t hot_method_threshold);
+  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
   void AddSamples(Thread* self, ArtMethod* method, size_t samples)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
-  void SignalCompiled(Thread* self, ArtMethod* method)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
   void DeleteThreadPool();
 
  private:
-  Mutex lock_;
-  std::unordered_map<jmethodID, size_t> samples_;
   size_t hot_method_threshold_;
+  size_t warm_method_threshold_;
   std::unique_ptr<ThreadPool> thread_pool_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
@@ -66,37 +63,43 @@
  public:
   explicit JitInstrumentationListener(JitInstrumentationCache* cache);
 
-  virtual void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
-                             ArtMethod* method, uint32_t /*dex_pc*/)
+  void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
+                     ArtMethod* method, uint32_t /*dex_pc*/)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     instrumentation_cache_->AddSamples(thread, method, 1);
   }
-  virtual void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                            ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                            const JValue& /*return_value*/)
+  void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+                    const JValue& /*return_value*/)
       OVERRIDE { }
-  virtual void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                            ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
-  virtual void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                         ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                         ArtField* /*field*/) OVERRIDE { }
-  virtual void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                            ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                            ArtField* /*field*/, const JValue& /*field_value*/)
+  void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                    ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
+  void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                 ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+                 ArtField* /*field*/) OVERRIDE { }
+  void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+                    ArtField* /*field*/, const JValue& /*field_value*/)
       OVERRIDE { }
-  virtual void ExceptionCaught(Thread* /*thread*/,
-                               mirror::Throwable* /*exception_object*/) OVERRIDE { }
+  void ExceptionCaught(Thread* /*thread*/,
+                       mirror::Throwable* /*exception_object*/) OVERRIDE { }
 
-  virtual void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
-                          ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
+  void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
+                  ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
 
-  // We only care about how many dex instructions were executed in the Jit.
-  virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     CHECK_LE(dex_pc_offset, 0);
     instrumentation_cache_->AddSamples(thread, method, 1);
   }
 
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee)
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   JitInstrumentationCache* const instrumentation_cache_;
 
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
new file mode 100644
index 0000000..0c039f2
--- /dev/null
+++ b/runtime/jit/profiling_info.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profiling_info.h"
+
+#include "art_method-inl.h"
+#include "dex_instruction.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
+
+namespace art {
+
+ProfilingInfo* ProfilingInfo::Create(ArtMethod* method) {
+  // Walk over the dex instructions of the method and keep track of
+  // instructions we are interested in profiling.
+  const uint16_t* code_ptr = nullptr;
+  const uint16_t* code_end = nullptr;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(!method->IsNative());
+    const DexFile::CodeItem& code_item = *method->GetCodeItem();
+    code_ptr = code_item.insns_;
+    code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+  }
+
+  uint32_t dex_pc = 0;
+  std::vector<uint32_t> entries;
+  while (code_ptr < code_end) {
+    const Instruction& instruction = *Instruction::At(code_ptr);
+    switch (instruction.Opcode()) {
+      case Instruction::INVOKE_VIRTUAL:
+      case Instruction::INVOKE_VIRTUAL_RANGE:
+      case Instruction::INVOKE_VIRTUAL_QUICK:
+      case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
+      case Instruction::INVOKE_INTERFACE:
+      case Instruction::INVOKE_INTERFACE_RANGE:
+        entries.push_back(dex_pc);
+        break;
+
+      default:
+        break;
+    }
+    dex_pc += instruction.SizeInCodeUnits();
+    code_ptr += instruction.SizeInCodeUnits();
+  }
+
+  // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
+  // object, it will never be filled.
+  if (entries.empty()) {
+    return nullptr;
+  }
+
+  // Allocate the `ProfilingInfo` object int the JIT's data space.
+  jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
+  size_t profile_info_size = sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size();
+  uint8_t* data = code_cache->ReserveData(Thread::Current(), profile_info_size);
+
+  if (data == nullptr) {
+    VLOG(jit) << "Cannot allocate profiling info anymore";
+    return nullptr;
+  }
+
+  return new (data) ProfilingInfo(entries);
+}
+
+void ProfilingInfo::AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls) {
+  InlineCache* cache = nullptr;
+  // TODO: binary search if array is too long.
+  for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+    if (cache_[i].dex_pc == dex_pc) {
+      cache = &cache_[i];
+      break;
+    }
+  }
+  DCHECK(cache != nullptr);
+
+  ScopedObjectAccess soa(self);
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    mirror::Class* existing = cache->classes_[i].Read<kWithoutReadBarrier>();
+    if (existing == cls) {
+      // Receiver type is already in the cache, nothing else to do.
+      return;
+    } else if (existing == nullptr) {
+      // Cache entry is empty, try to put `cls` in it.
+      GcRoot<mirror::Class> expected_root(nullptr);
+      GcRoot<mirror::Class> desired_root(cls);
+      if (!reinterpret_cast<Atomic<GcRoot<mirror::Class>>*>(&cache->classes_[i])->
+              CompareExchangeStrongSequentiallyConsistent(expected_root, desired_root)) {
+        // Some other thread put a class in the cache, continue iteration starting at this
+        // entry in case the entry contains `cls`.
+        --i;
+      } else {
+        // We successfully set `cls`, just return.
+        return;
+      }
+    }
+  }
+  // Unsuccessfull - cache is full, making it megamorphic.
+  DCHECK(cache->IsMegamorphic());
+}
+
+}  // namespace art
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
new file mode 100644
index 0000000..73ca41a
--- /dev/null
+++ b/runtime/jit/profiling_info.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILING_INFO_H_
+#define ART_RUNTIME_JIT_PROFILING_INFO_H_
+
+#include <vector>
+
+#include "base/macros.h"
+#include "gc_root.h"
+
+namespace art {
+
+class ArtMethod;
+
+namespace mirror {
+class Class;
+}
+
+/**
+ * Profiling info for a method, created and filled by the interpreter once the
+ * method is warm, and used by the compiler to drive optimizations.
+ */
+class ProfilingInfo {
+ public:
+  static ProfilingInfo* Create(ArtMethod* method);
+
+  // Add information from an executed INVOKE instruction to the profile.
+  void AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls);
+
+  // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
+  template<typename RootVisitorType>
+  void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
+    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+      InlineCache* cache = &cache_[i];
+      for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
+        visitor.VisitRootIfNonNull(cache->classes_[j].AddressWithoutBarrier());
+      }
+    }
+  }
+
+ private:
+  // Structure to store the classes seen at runtime for a specific instruction.
+  // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+  struct InlineCache {
+    bool IsMonomorphic() const {
+      DCHECK_GE(kIndividualCacheSize, 2);
+      return !classes_[0].IsNull() && classes_[1].IsNull();
+    }
+
+    bool IsMegamorphic() const {
+      for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+        if (classes_[i].IsNull()) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+    bool IsUnitialized() const {
+      return classes_[0].IsNull();
+    }
+
+    bool IsPolymorphic() const {
+      DCHECK_GE(kIndividualCacheSize, 3);
+      return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+    }
+
+    static constexpr uint16_t kIndividualCacheSize = 5;
+    uint32_t dex_pc;
+    GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+  };
+
+  explicit ProfilingInfo(const std::vector<uint32_t>& entries)
+      : number_of_inline_caches_(entries.size()) {
+    memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
+    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+      cache_[i].dex_pc = entries[i];
+    }
+  }
+
+  // Number of instructions we are profiling in the ArtMethod.
+  const uint32_t number_of_inline_caches_;
+
+  // Dynamically allocated array of size `number_of_inline_caches_`.
+  InlineCache cache_[0];
+
+  DISALLOW_COPY_AND_ASSIGN(ProfilingInfo);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_PROFILING_INFO_H_
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 10b381d..93f2aea 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -842,10 +842,10 @@
     }
   }
   for (ArtMethod& method : GetDirectMethods(pointer_size)) {
-    method.VisitRoots(visitor);
+    method.VisitRoots(visitor, pointer_size);
   }
   for (ArtMethod& method : GetVirtualMethods(pointer_size)) {
-    method.VisitRoots(visitor);
+    method.VisitRoots(visitor, pointer_size);
   }
 }
 
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 7910f94..9e12806 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -35,7 +35,7 @@
     trace = soa.Self()->CreateInternalStackTrace<false>(soa);
   } else {
     // Suspend thread to build stack trace.
-    ScopedThreadSuspension sts(soa.Self(), kSuspended);
+    ScopedThreadSuspension sts(soa.Self(), kNative);
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
     bool timed_out;
     Thread* thread = thread_list->SuspendThreadByPeer(peer, true, false, &timed_out);
@@ -47,11 +47,9 @@
       }
       // Restart suspended thread.
       thread_list->Resume(thread, false);
-    } else {
-      if (timed_out) {
-        LOG(ERROR) << "Trying to get thread's stack failed as the thread failed to suspend within a "
-            "generous timeout.";
-      }
+    } else if (timed_out) {
+      LOG(ERROR) << "Trying to get thread's stack failed as the thread failed to suspend within a "
+          "generous timeout.";
     }
   }
   return trace;
diff --git a/runtime/oat.h b/runtime/oat.h
index 1520a9b..b8b8d30 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '9', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '0', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 25b5e49..50e2053 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -158,6 +158,9 @@
       .Define("-Xjitthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITCompileThreshold)
+      .Define("-Xjitwarmupthreshold:_")
+          .WithType<unsigned int>()
+          .IntoKey(M::JITWarmupThreshold)
       .Define("-XX:HspaceCompactForOOMMinIntervalMs=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs)
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 7e8c551..6a77a9e 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -57,7 +57,8 @@
 class BoundedStackVisitor : public StackVisitor {
  public:
   BoundedStackVisitor(std::vector<std::pair<ArtMethod*, uint32_t>>* stack,
-      Thread* thread, uint32_t max_depth)
+                      Thread* thread,
+                      uint32_t max_depth)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         stack_(stack),
@@ -80,9 +81,11 @@
   }
 
  private:
-  std::vector<std::pair<ArtMethod*, uint32_t>>* stack_;
+  std::vector<std::pair<ArtMethod*, uint32_t>>* const stack_;
   const uint32_t max_depth_;
   uint32_t depth_;
+
+  DISALLOW_COPY_AND_ASSIGN(BoundedStackVisitor);
 };
 
 // This is called from either a thread list traversal or from a checkpoint.  Regardless
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index dd3703c..64c2249 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -40,6 +40,9 @@
   kIntrinsicReverseBits,
   kIntrinsicReverseBytes,
   kIntrinsicNumberOfLeadingZeros,
+  kIntrinsicNumberOfTrailingZeros,
+  kIntrinsicRotateRight,
+  kIntrinsicRotateLeft,
   kIntrinsicAbsInt,
   kIntrinsicAbsLong,
   kIntrinsicAbsFloat,
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 9d5ce9f..d797d2a 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -40,14 +40,13 @@
     handler_dex_pc_(0), clear_exception_(false), handler_frame_depth_(kInvalidFrameDepth) {
 }
 
-// Finds catch handler or prepares for deoptimization.
+// Finds catch handler.
 class CatchBlockStackVisitor FINAL : public StackVisitor {
  public:
   CatchBlockStackVisitor(Thread* self, Context* context, Handle<mirror::Throwable>* exception,
                          QuickExceptionHandler* exception_handler)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        self_(self),
         exception_(exception),
         exception_handler_(exception_handler) {
   }
@@ -90,14 +89,15 @@
     }
     if (dex_pc != DexFile::kDexNoIndex) {
       bool clear_exception = false;
-      StackHandleScope<1> hs(self_);
+      StackHandleScope<1> hs(GetThread());
       Handle<mirror::Class> to_find(hs.NewHandle((*exception_)->GetClass()));
       uint32_t found_dex_pc = method->FindCatchBlock(to_find, dex_pc, &clear_exception);
       exception_handler_->SetClearException(clear_exception);
       if (found_dex_pc != DexFile::kDexNoIndex) {
         exception_handler_->SetHandlerMethod(method);
         exception_handler_->SetHandlerDexPc(found_dex_pc);
-        exception_handler_->SetHandlerQuickFramePc(method->ToNativeQuickPc(found_dex_pc));
+        exception_handler_->SetHandlerQuickFramePc(
+            method->ToNativeQuickPc(found_dex_pc, /* is_catch_handler */ true));
         exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
         return false;  // End stack walk.
       }
@@ -105,7 +105,6 @@
     return true;  // Continue stack walk.
   }
 
-  Thread* const self_;
   // The exception we're looking for the catch block of.
   Handle<mirror::Throwable>* exception_;
   // The quick exception handler we're visiting for.
@@ -125,7 +124,7 @@
   StackHandleScope<1> hs(self_);
   Handle<mirror::Throwable> exception_ref(hs.NewHandle(exception));
 
-  // Walk the stack to find catch handler or prepare for deoptimization.
+  // Walk the stack to find catch handler.
   CatchBlockStackVisitor visitor(self_, context_, &exception_ref, this);
   visitor.WalkStack(true);
 
@@ -146,15 +145,106 @@
     // Put exception back in root set with clear throw location.
     self_->SetException(exception_ref.Get());
   }
-  // The debugger may suspend this thread and walk its stack. Let's do this before popping
-  // instrumentation frames.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (instrumentation->HasExceptionCaughtListeners()
-      && self_->IsExceptionThrownByCurrentMethod(exception)) {
-    instrumentation->ExceptionCaughtEvent(self_, exception_ref.Get());
-    // Instrumentation may have been updated.
-    method_tracing_active_ = is_deoptimization_ ||
-        Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
+  // If the handler is in optimized code, we need to set the catch environment.
+  if (*handler_quick_frame_ != nullptr &&
+      handler_method_ != nullptr &&
+      handler_method_->IsOptimized(sizeof(void*))) {
+    SetCatchEnvironmentForOptimizedHandler(&visitor);
+  }
+}
+
+static VRegKind ToVRegKind(DexRegisterLocation::Kind kind) {
+  // Slightly hacky since we cannot map DexRegisterLocationKind and VRegKind
+  // one to one. However, StackVisitor::GetVRegFromOptimizedCode only needs to
+  // distinguish between core/FPU registers and low/high bits on 64-bit.
+  switch (kind) {
+    case DexRegisterLocation::Kind::kConstant:
+    case DexRegisterLocation::Kind::kInStack:
+      // VRegKind is ignored.
+      return VRegKind::kUndefined;
+
+    case DexRegisterLocation::Kind::kInRegister:
+      // Selects core register. For 64-bit registers, selects low 32 bits.
+      return VRegKind::kLongLoVReg;
+
+    case DexRegisterLocation::Kind::kInRegisterHigh:
+      // Selects core register. For 64-bit registers, selects high 32 bits.
+      return VRegKind::kLongHiVReg;
+
+    case DexRegisterLocation::Kind::kInFpuRegister:
+      // Selects FPU register. For 64-bit registers, selects low 32 bits.
+      return VRegKind::kDoubleLoVReg;
+
+    case DexRegisterLocation::Kind::kInFpuRegisterHigh:
+      // Selects FPU register. For 64-bit registers, selects high 32 bits.
+      return VRegKind::kDoubleHiVReg;
+
+    default:
+      LOG(FATAL) << "Unexpected vreg location "
+                 << DexRegisterLocation::PrettyDescriptor(kind);
+      UNREACHABLE();
+  }
+}
+
+void QuickExceptionHandler::SetCatchEnvironmentForOptimizedHandler(StackVisitor* stack_visitor) {
+  DCHECK(!is_deoptimization_);
+  DCHECK(*handler_quick_frame_ != nullptr) << "Method should not be called on upcall exceptions";
+  DCHECK(handler_method_ != nullptr && handler_method_->IsOptimized(sizeof(void*)));
+
+  if (kDebugExceptionDelivery) {
+    self_->DumpStack(LOG(INFO) << "Setting catch phis: ");
+  }
+
+  const size_t number_of_vregs = handler_method_->GetCodeItem()->registers_size_;
+  CodeInfo code_info = handler_method_->GetOptimizedCodeInfo();
+  StackMapEncoding encoding = code_info.ExtractEncoding();
+
+  // Find stack map of the throwing instruction.
+  StackMap throw_stack_map =
+      code_info.GetStackMapForNativePcOffset(stack_visitor->GetNativePcOffset(), encoding);
+  DCHECK(throw_stack_map.IsValid());
+  DexRegisterMap throw_vreg_map =
+      code_info.GetDexRegisterMapOf(throw_stack_map, encoding, number_of_vregs);
+
+  // Find stack map of the catch block.
+  StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc(), encoding);
+  DCHECK(catch_stack_map.IsValid());
+  DexRegisterMap catch_vreg_map =
+      code_info.GetDexRegisterMapOf(catch_stack_map, encoding, number_of_vregs);
+
+  // Copy values between them.
+  for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
+    DexRegisterLocation::Kind catch_location =
+        catch_vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding);
+    if (catch_location == DexRegisterLocation::Kind::kNone) {
+      continue;
+    }
+    DCHECK(catch_location == DexRegisterLocation::Kind::kInStack);
+
+    // Get vreg value from its current location.
+    uint32_t vreg_value;
+    VRegKind vreg_kind = ToVRegKind(throw_vreg_map.GetLocationKind(vreg,
+                                                                   number_of_vregs,
+                                                                   code_info,
+                                                                   encoding));
+    bool get_vreg_success = stack_visitor->GetVReg(stack_visitor->GetMethod(),
+                                                   vreg,
+                                                   vreg_kind,
+                                                   &vreg_value);
+    CHECK(get_vreg_success) << "VReg " << vreg << " was optimized out ("
+                            << "method=" << PrettyMethod(stack_visitor->GetMethod()) << ", "
+                            << "dex_pc=" << stack_visitor->GetDexPc() << ", "
+                            << "native_pc_offset=" << stack_visitor->GetNativePcOffset() << ")";
+
+    // Copy value to the catch phi's stack slot.
+    int32_t slot_offset = catch_vreg_map.GetStackOffsetInBytes(vreg,
+                                                               number_of_vregs,
+                                                               code_info,
+                                                               encoding);
+    ArtMethod** frame_top = stack_visitor->GetCurrentQuickFrame();
+    uint8_t* slot_address = reinterpret_cast<uint8_t*>(frame_top) + slot_offset;
+    uint32_t* slot_ptr = reinterpret_cast<uint32_t*>(slot_address);
+    *slot_ptr = vreg_value;
   }
 }
 
@@ -164,7 +254,6 @@
   DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler)
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        self_(self),
         exception_handler_(exception_handler),
         prev_shadow_frame_(nullptr),
         stacked_shadow_frame_pushed_(false) {
@@ -181,7 +270,8 @@
         // In case there is no deoptimized shadow frame for this upcall, we still
         // need to push a nullptr to the stack since there is always a matching pop after
         // the long jump.
-        self_->PushStackedShadowFrame(nullptr, StackedShadowFrameType::kDeoptimizationShadowFrame);
+        GetThread()->PushStackedShadowFrame(nullptr,
+                                            StackedShadowFrameType::kDeoptimizationShadowFrame);
         stacked_shadow_frame_pushed_ = true;
       }
       return false;  // End stack walk.
@@ -189,6 +279,12 @@
       // Ignore callee save method.
       DCHECK(method->IsCalleeSaveMethod());
       return true;
+    } else if (method->IsNative()) {
+      // If we return from JNI with a pending exception and want to deoptimize, we need to skip
+      // the native method.
+      // The top method is a runtime method, the native method comes next.
+      CHECK_EQ(GetFrameDepth(), 1U);
+      return true;
     } else {
       return HandleDeoptimization(method);
     }
@@ -201,21 +297,22 @@
 
   bool HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) {
     const DexFile::CodeItem* code_item = m->GetCodeItem();
-    CHECK(code_item != nullptr);
+    CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m);
     uint16_t num_regs = code_item->registers_size_;
     uint32_t dex_pc = GetDexPc();
-    StackHandleScope<2> hs(self_);  // Dex cache, class loader and method.
+    StackHandleScope<2> hs(GetThread());  // Dex cache, class loader and method.
     mirror::Class* declaring_class = m->GetDeclaringClass();
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
-    verifier::MethodVerifier verifier(self_, h_dex_cache->GetDexFile(), h_dex_cache, h_class_loader,
-                                      &m->GetClassDef(), code_item, m->GetDexMethodIndex(),
-                                      m, m->GetAccessFlags(), true, true, true, true);
+    verifier::MethodVerifier verifier(GetThread(), h_dex_cache->GetDexFile(), h_dex_cache,
+                                      h_class_loader, &m->GetClassDef(), code_item,
+                                      m->GetDexMethodIndex(), m, m->GetAccessFlags(), true, true,
+                                      true, true);
     bool verifier_success = verifier.Verify();
     CHECK(verifier_success) << PrettyMethod(m);
     ShadowFrame* new_frame = ShadowFrame::CreateDeoptimizedFrame(num_regs, nullptr, m, dex_pc);
     {
-      ScopedStackedShadowFramePusher pusher(self_, new_frame,
+      ScopedStackedShadowFramePusher pusher(GetThread(), new_frame,
                                             StackedShadowFrameType::kShadowFrameUnderConstruction);
       const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc));
 
@@ -322,13 +419,13 @@
       // Will be popped after the long jump after DeoptimizeStack(),
       // right before interpreter::EnterInterpreterFromDeoptimize().
       stacked_shadow_frame_pushed_ = true;
-      self_->PushStackedShadowFrame(new_frame, StackedShadowFrameType::kDeoptimizationShadowFrame);
+      GetThread()->PushStackedShadowFrame(new_frame,
+                                          StackedShadowFrameType::kDeoptimizationShadowFrame);
     }
     prev_shadow_frame_ = new_frame;
     return true;
   }
 
-  Thread* const self_;
   QuickExceptionHandler* const exception_handler_;
   ShadowFrame* prev_shadow_frame_;
   bool stacked_shadow_frame_pushed_;
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index e934834..2e05c7e 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -43,9 +43,21 @@
     UNREACHABLE();
   }
 
+  // Find the catch handler for the given exception.
   void FindCatch(mirror::Throwable* exception) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy"
+  // shadow frame that will be executed with the interpreter.
   void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  // Update the instrumentation stack by removing all methods that will be unwound
+  // by the exception being thrown.
   void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Set up environment before delivering an exception to optimized code.
+  void SetCatchEnvironmentForOptimizedHandler(StackVisitor* stack_visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Long jump either to a catch handler or to the upcall.
   NO_RETURN void DoLongJump() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetHandlerQuickFrame(ArtMethod** handler_quick_frame) {
@@ -83,9 +95,10 @@
  private:
   Thread* const self_;
   Context* const context_;
+  // Should we deoptimize the stack?
   const bool is_deoptimization_;
   // Is method tracing active?
-  bool method_tracing_active_;
+  const bool method_tracing_active_;
   // Quick frame with found handler or last frame if no handler found.
   ArtMethod** handler_quick_frame_;
   // PC to branch to for the handler.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 4797564..6b144cf 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -57,6 +57,7 @@
 #include "atomic.h"
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
@@ -129,6 +130,7 @@
 #include "thread_list.h"
 #include "trace.h"
 #include "transaction.h"
+#include "utils.h"
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
 
@@ -1397,19 +1399,20 @@
   // Visiting the roots of these ArtMethods is not currently required since all the GcRoots are
   // null.
   BufferedRootVisitor<16> buffered_visitor(visitor, RootInfo(kRootVMInternal));
+  const size_t pointer_size = GetClassLinker()->GetImagePointerSize();
   if (HasResolutionMethod()) {
-    resolution_method_->VisitRoots(buffered_visitor);
+    resolution_method_->VisitRoots(buffered_visitor, pointer_size);
   }
   if (HasImtConflictMethod()) {
-    imt_conflict_method_->VisitRoots(buffered_visitor);
+    imt_conflict_method_->VisitRoots(buffered_visitor, pointer_size);
   }
   if (imt_unimplemented_method_ != nullptr) {
-    imt_unimplemented_method_->VisitRoots(buffered_visitor);
+    imt_unimplemented_method_->VisitRoots(buffered_visitor, pointer_size);
   }
   for (size_t i = 0; i < kLastCalleeSaveType; ++i) {
     auto* m = reinterpret_cast<ArtMethod*>(callee_save_methods_[i]);
     if (m != nullptr) {
-      m->VisitRoots(buffered_visitor);
+      m->VisitRoots(buffered_visitor, pointer_size);
     }
   }
 }
@@ -1749,7 +1752,8 @@
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
   if (jit_.get() != nullptr) {
     compiler_callbacks_ = jit_->GetCompilerCallbacks();
-    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold());
+    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
+                                     jit_options_->GetWarmupThreshold());
     jit_->CreateThreadPool();
   } else {
     LOG(WARNING) << "Failed to create JIT " << error_msg;
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 02ed3a2..d88e84b 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -68,6 +68,7 @@
 RUNTIME_OPTIONS_KEY (bool,                EnableHSpaceCompactForOOM,      true)
 RUNTIME_OPTIONS_KEY (bool,                UseJIT,                         false)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold,            jit::Jit::kDefaultCompileThreshold)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheCapacity,           jit::JitCodeCache::kDefaultCapacity)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
diff --git a/runtime/stack.cc b/runtime/stack.cc
index a765a3f..d956f0e 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -325,6 +325,10 @@
 
 bool StackVisitor::GetRegisterIfAccessible(uint32_t reg, VRegKind kind, uint32_t* val) const {
   const bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
+
+  // X86 float registers are 64-bit and the logic below does not apply.
+  DCHECK(!is_float || kRuntimeISA != InstructionSet::kX86);
+
   if (!IsAccessibleRegister(reg, is_float)) {
     return false;
   }
diff --git a/runtime/stack.h b/runtime/stack.h
index 2562738..5bbf003 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -441,6 +441,10 @@
   void WalkStack(bool include_transitions = false)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  Thread* GetThread() const {
+    return thread_;
+  }
+
   ArtMethod* GetMethod() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool IsShadowFrame() const {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 07b79b5..a15a081 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -1115,7 +1115,7 @@
     region_.StoreUnaligned<NumberOfStackMapsType>(kNumberOfStackMapsOffset, number_of_stack_maps);
   }
 
-  // Get the size all the stack maps of this CodeInfo object, in bytes.
+  // Get the size of all the stack maps of this CodeInfo object, in bytes.
   size_t GetStackMapsSize(const StackMapEncoding& encoding) const {
     return encoding.ComputeStackMapSize() * GetNumberOfStackMaps();
   }
@@ -1174,9 +1174,23 @@
     return StackMap();
   }
 
+  // Searches the stack map list backwards because catch stack maps are stored
+  // at the end.
+  StackMap GetCatchStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const {
+    for (size_t i = GetNumberOfStackMaps(); i > 0; --i) {
+      StackMap stack_map = GetStackMapAt(i - 1, encoding);
+      if (stack_map.GetDexPc(encoding) == dex_pc) {
+        return stack_map;
+      }
+    }
+    return StackMap();
+  }
+
   StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset,
                                         const StackMapEncoding& encoding) const {
-    // TODO: stack maps are sorted by native pc, we can do a binary search.
+    // TODO: Safepoint stack maps are sorted by native_pc_offset but catch stack
+    //       maps are not. If we knew that the method does not have try/catch,
+    //       we could do binary search.
     for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i, encoding);
       if (stack_map.GetNativePcOffset(encoding) == native_pc_offset) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index af5830a..6e10368 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1185,7 +1185,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       : StackVisitor(thread_in, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         os(os_in),
-        thread(thread_in),
         can_allocate(can_allocate_in),
         last_method(nullptr),
         last_line_number(0),
@@ -1233,7 +1232,7 @@
       }
       os << "\n";
       if (frame_count == 0) {
-        Monitor::DescribeWait(os, thread);
+        Monitor::DescribeWait(os, GetThread());
       }
       if (can_allocate) {
         // Visit locks, but do not abort on errors. This would trigger a nested abort.
@@ -1269,7 +1268,6 @@
   }
 
   std::ostream& os;
-  const Thread* thread;
   const bool can_allocate;
   ArtMethod* last_method;
   int last_line_number;
@@ -1825,6 +1823,8 @@
   uint32_t depth_;
   uint32_t skip_depth_;
   bool skipping_;
+
+  DISALLOW_COPY_AND_ASSIGN(CountStackDepthVisitor);
 };
 
 template<bool kTransactionActive>
@@ -1891,7 +1891,9 @@
   // An array of the methods on the stack, the last entries are the dex PCs.
   mirror::PointerArray* trace_;
   // For cross compilation.
-  size_t pointer_size_;
+  const size_t pointer_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(BuildInternalStackTraceVisitor);
 };
 
 template<bool kTransactionActive>
@@ -2344,10 +2346,31 @@
   // Get exception from thread.
   mirror::Throwable* exception = GetException();
   CHECK(exception != nullptr);
+  bool is_deoptimization = (exception == GetDeoptimizationException());
+  if (!is_deoptimization) {
+    // This is a real exception: let the instrumentation know about it.
+    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    if (instrumentation->HasExceptionCaughtListeners() &&
+        IsExceptionThrownByCurrentMethod(exception)) {
+      // Instrumentation may cause GC so keep the exception object safe.
+      StackHandleScope<1> hs(this);
+      HandleWrapper<mirror::Throwable> h_exception(hs.NewHandleWrapper(&exception));
+      instrumentation->ExceptionCaughtEvent(this, exception);
+    }
+    // Does instrumentation need to deoptimize the stack?
+    // Note: we do this *after* reporting the exception to instrumentation in case it
+    // now requires deoptimization. It may happen if a debugger is attached and requests
+    // new events (single-step, breakpoint, ...) when the exception is reported.
+    is_deoptimization = Dbg::IsForcedInterpreterNeededForException(this);
+    if (is_deoptimization) {
+      // Save the exception into the deoptimization context so it can be restored
+      // before entering the interpreter.
+      PushDeoptimizationContext(JValue(), false, exception);
+    }
+  }
   // Don't leave exception visible while we try to find the handler, which may cause class
   // resolution.
   ClearException();
-  bool is_deoptimization = (exception == GetDeoptimizationException());
   QuickExceptionHandler exception_handler(this, is_deoptimization);
   if (is_deoptimization) {
     exception_handler.DeoptimizeStack();
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index d63781b..6176acd 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -1282,4 +1282,12 @@
   allocated_ids_.reset(id);
 }
 
+ScopedSuspendAll::ScopedSuspendAll(const char* cause, bool long_suspend) {
+  Runtime::Current()->GetThreadList()->SuspendAll(cause, long_suspend);
+}
+
+ScopedSuspendAll::~ScopedSuspendAll() {
+  Runtime::Current()->GetThreadList()->ResumeAll();
+}
+
 }  // namespace art
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 4c50181..c727432 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -19,6 +19,7 @@
 
 #include "base/histogram.h"
 #include "base/mutex.h"
+#include "base/value_object.h"
 #include "gc_root.h"
 #include "jni.h"
 #include "object_callbacks.h"
@@ -60,12 +61,13 @@
       REQUIRES(!Locks::thread_suspend_count_lock_);
 
   // Suspends all threads and gets exclusive access to the mutator_lock_.
-  // If long suspend is true, then other people who try to suspend will never timeout. Long suspend
-  // is currenly used for hprof since large heaps take a long time.
+  // If long_suspend is true, then other threads who try to suspend will never timeout.
+  // long_suspend is currenly used for hprof since large heaps take a long time.
   void SuspendAll(const char* cause, bool long_suspend = false)
       EXCLUSIVE_LOCK_FUNCTION(Locks::mutator_lock_)
-      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
-
+      REQUIRES(!Locks::thread_list_lock_,
+               !Locks::thread_suspend_count_lock_,
+               !Locks::mutator_lock_);
 
   // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success,
   // else null. The peer is used to identify the thread to avoid races with the thread terminating.
@@ -188,6 +190,20 @@
   DISALLOW_COPY_AND_ASSIGN(ThreadList);
 };
 
+// Helper for suspending all threads and
+class ScopedSuspendAll : public ValueObject {
+ public:
+  ScopedSuspendAll(const char* cause, bool long_suspend = false)
+     EXCLUSIVE_LOCK_FUNCTION(Locks::mutator_lock_)
+     REQUIRES(!Locks::thread_list_lock_,
+              !Locks::thread_suspend_count_lock_,
+              !Locks::mutator_lock_);
+  // No REQUIRES(mutator_lock_) since the unlock function already asserts this.
+  ~ScopedSuspendAll()
+      UNLOCK_FUNCTION(Locks::mutator_lock_)
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_THREAD_LIST_H_
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 4ab5c0e..e2743ce 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -73,6 +73,8 @@
 
  private:
   std::vector<ArtMethod*>* const method_trace_;
+
+  DISALLOW_COPY_AND_ASSIGN(BuildStackTraceVisitor);
 };
 
 static const char     kTraceTokenChar             = '*';
@@ -293,13 +295,11 @@
         break;
       }
     }
-
-    runtime->GetThreadList()->SuspendAll(__FUNCTION__);
     {
+      ScopedSuspendAll ssa(__FUNCTION__);
       MutexLock mu(self, *Locks::thread_list_lock_);
       runtime->GetThreadList()->ForEach(GetSample, the_trace);
     }
-    runtime->GetThreadList()->ResumeAll();
     ATRACE_END();
   }
 
@@ -348,10 +348,9 @@
   // Enable count of allocs if specified in the flags.
   bool enable_stats = false;
 
-  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
-
   // Create Trace object.
   {
+    ScopedSuspendAll ssa(__FUNCTION__);
     MutexLock mu(self, *Locks::trace_lock_);
     if (the_trace_ != nullptr) {
       LOG(ERROR) << "Trace already in progress, ignoring this request";
@@ -375,8 +374,6 @@
     }
   }
 
-  runtime->GetThreadList()->ResumeAll();
-
   // Can't call this when holding the mutator lock.
   if (enable_stats) {
     runtime->SetStatsEnabled(true);
@@ -405,40 +402,41 @@
     CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown");
     sampling_pthread_ = 0U;
   }
-  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
 
-  if (the_trace != nullptr) {
-    stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
-    if (finish_tracing) {
-      the_trace->FinishTracing();
-    }
+  {
+    ScopedSuspendAll ssa(__FUNCTION__);
+    if (the_trace != nullptr) {
+      stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
+      if (finish_tracing) {
+        the_trace->FinishTracing();
+      }
 
-    if (the_trace->trace_mode_ == TraceMode::kSampling) {
-      MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
-      runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
-    } else {
-      runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
-      runtime->GetInstrumentation()->RemoveListener(
-          the_trace, instrumentation::Instrumentation::kMethodEntered |
-          instrumentation::Instrumentation::kMethodExited |
-          instrumentation::Instrumentation::kMethodUnwind);
-    }
-    if (the_trace->trace_file_.get() != nullptr) {
-      // Do not try to erase, so flush and close explicitly.
-      if (flush_file) {
-        if (the_trace->trace_file_->Flush() != 0) {
-          PLOG(WARNING) << "Could not flush trace file.";
-        }
+      if (the_trace->trace_mode_ == TraceMode::kSampling) {
+        MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+        runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
       } else {
-        the_trace->trace_file_->MarkUnchecked();  // Do not trigger guard.
+        runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
+        runtime->GetInstrumentation()->RemoveListener(
+            the_trace, instrumentation::Instrumentation::kMethodEntered |
+            instrumentation::Instrumentation::kMethodExited |
+            instrumentation::Instrumentation::kMethodUnwind);
       }
-      if (the_trace->trace_file_->Close() != 0) {
-        PLOG(ERROR) << "Could not close trace file.";
+      if (the_trace->trace_file_.get() != nullptr) {
+        // Do not try to erase, so flush and close explicitly.
+        if (flush_file) {
+          if (the_trace->trace_file_->Flush() != 0) {
+            PLOG(WARNING) << "Could not flush trace file.";
+          }
+        } else {
+          the_trace->trace_file_->MarkUnchecked();  // Do not trigger guard.
+        }
+        if (the_trace->trace_file_->Close() != 0) {
+          PLOG(ERROR) << "Could not close trace file.";
+        }
       }
+      delete the_trace;
     }
-    delete the_trace;
   }
-  runtime->GetThreadList()->ResumeAll();
   if (stop_alloc_counting) {
     // Can be racy since SetStatsEnabled is not guarded by any locks.
     runtime->SetStatsEnabled(false);
@@ -492,7 +490,7 @@
   }
 
   if (the_trace != nullptr) {
-    runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+    ScopedSuspendAll ssa(__FUNCTION__);
     stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
 
     if (the_trace->trace_mode_ == TraceMode::kSampling) {
@@ -500,12 +498,12 @@
       runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
     } else {
       runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey);
-      runtime->GetInstrumentation()->RemoveListener(the_trace,
-                                                    instrumentation::Instrumentation::kMethodEntered |
-                                                    instrumentation::Instrumentation::kMethodExited |
-                                                    instrumentation::Instrumentation::kMethodUnwind);
+      runtime->GetInstrumentation()->RemoveListener(
+          the_trace,
+          instrumentation::Instrumentation::kMethodEntered |
+          instrumentation::Instrumentation::kMethodExited |
+          instrumentation::Instrumentation::kMethodUnwind);
     }
-    runtime->GetThreadList()->ResumeAll();
   }
 
   if (stop_alloc_counting) {
@@ -531,23 +529,23 @@
   // Enable count of allocs if specified in the flags.
   bool enable_stats = (the_trace->flags_ && kTraceCountAllocs) != 0;
 
-  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+  {
+    ScopedSuspendAll ssa(__FUNCTION__);
 
-  // Reenable.
-  if (the_trace->trace_mode_ == TraceMode::kSampling) {
-    CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, nullptr, &RunSamplingThread,
-        reinterpret_cast<void*>(the_trace->interval_us_)), "Sampling profiler thread");
-  } else {
-    runtime->GetInstrumentation()->AddListener(the_trace,
-                                               instrumentation::Instrumentation::kMethodEntered |
-                                               instrumentation::Instrumentation::kMethodExited |
-                                               instrumentation::Instrumentation::kMethodUnwind);
-    // TODO: In full-PIC mode, we don't need to fully deopt.
-    runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey);
+    // Reenable.
+    if (the_trace->trace_mode_ == TraceMode::kSampling) {
+      CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, nullptr, &RunSamplingThread,
+          reinterpret_cast<void*>(the_trace->interval_us_)), "Sampling profiler thread");
+    } else {
+      runtime->GetInstrumentation()->AddListener(the_trace,
+                                                 instrumentation::Instrumentation::kMethodEntered |
+                                                 instrumentation::Instrumentation::kMethodExited |
+                                                 instrumentation::Instrumentation::kMethodUnwind);
+      // TODO: In full-PIC mode, we don't need to fully deopt.
+      runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey);
+    }
   }
 
-  runtime->GetThreadList()->ResumeAll();
-
   // Can't call this when holding the mutator lock.
   if (enable_stats) {
     runtime->SetStatsEnabled(true);
@@ -806,6 +804,15 @@
   LOG(ERROR) << "Unexpected backward branch event in tracing" << PrettyMethod(method);
 }
 
+void Trace::InvokeVirtualOrInterface(Thread*,
+                                     mirror::Object*,
+                                     ArtMethod* method,
+                                     uint32_t dex_pc,
+                                     ArtMethod*) {
+  LOG(ERROR) << "Unexpected invoke event in tracing" << PrettyMethod(method)
+             << " " << dex_pc;
+}
+
 void Trace::ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint32_t* wall_clock_diff) {
   if (UseThreadCpuClock()) {
     uint64_t clock_base = thread->GetTraceClockBase();
diff --git a/runtime/trace.h b/runtime/trace.h
index 04be3dd..87a691d 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -166,6 +166,12 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   // Reuse an old stack trace if it exists, otherwise allocate a new one.
   static std::vector<ArtMethod*>* AllocStackTrace();
   // Clear and store an old stack trace for later use.
diff --git a/runtime/utils.h b/runtime/utils.h
index 16835c2..3e61824 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -294,25 +294,6 @@
   buf->push_back((data >> 24) & 0xff);
 }
 
-// Deleter using free() for use with std::unique_ptr<>. See also UniqueCPtr<> below.
-struct FreeDelete {
-  // NOTE: Deleting a const object is valid but free() takes a non-const pointer.
-  void operator()(const void* ptr) const {
-    free(const_cast<void*>(ptr));
-  }
-};
-
-// Alias for std::unique_ptr<> that uses the C function free() to delete objects.
-template <typename T>
-using UniqueCPtr = std::unique_ptr<T, FreeDelete>;
-
-// C++14 from-the-future import (std::make_unique)
-// Invoke the constructor of 'T' with the provided args, and wrap the result in a unique ptr.
-template <typename T, typename ... Args>
-std::unique_ptr<T> MakeUnique(Args&& ... args) {
-  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
-}
-
 inline bool TestBitmap(size_t idx, const uint8_t* bitmap) {
   return ((bitmap[idx / kBitsPerByte] >> (idx % kBitsPerByte)) & 0x01) != 0;
 }
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index d768afd..3d4f04c 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -22,6 +22,7 @@
 #include "art_method-inl.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
+#include "base/stl_util.h"
 #include "base/time_utils.h"
 #include "class_linker.h"
 #include "compiler_callbacks.h"
@@ -424,6 +425,7 @@
       has_virtual_or_interface_invokes_(false),
       verify_to_dump_(verify_to_dump),
       allow_thread_suspension_(allow_thread_suspension),
+      is_constructor_(false),
       link_(nullptr) {
   self->PushVerifier(this);
   DCHECK(class_def != nullptr);
@@ -555,15 +557,124 @@
 }
 
 bool MethodVerifier::Verify() {
-  // If there aren't any instructions, make sure that's expected, then exit successfully.
-  if (code_item_ == nullptr) {
-    if ((method_access_flags_ & (kAccNative | kAccAbstract)) == 0) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "zero-length code in concrete non-native method";
+  // Some older code doesn't correctly mark constructors as such. Test for this case by looking at
+  // the name.
+  const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_);
+  const char* method_name = dex_file_->StringDataByIdx(method_id.name_idx_);
+  bool instance_constructor_by_name = strcmp("<init>", method_name) == 0;
+  bool static_constructor_by_name = strcmp("<clinit>", method_name) == 0;
+  bool constructor_by_name = instance_constructor_by_name || static_constructor_by_name;
+  // Check that only constructors are tagged, and check for bad code that doesn't tag constructors.
+  if ((method_access_flags_ & kAccConstructor) != 0) {
+    if (!constructor_by_name) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+            << "method is marked as constructor, but not named accordingly";
       return false;
-    } else {
-      return true;
+    }
+    is_constructor_ = true;
+  } else if (constructor_by_name) {
+    LOG(WARNING) << "Method " << PrettyMethod(dex_method_idx_, *dex_file_)
+                 << " not marked as constructor.";
+    is_constructor_ = true;
+  }
+  // If it's a constructor, check whether IsStatic() matches the name.
+  // This should have been rejected by the dex file verifier. Only do in debug build.
+  if (kIsDebugBuild) {
+    if (IsConstructor()) {
+      if (IsStatic() ^ static_constructor_by_name) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+              << "constructor name doesn't match static flag";
+        return false;
+      }
     }
   }
+
+  // Methods may only have one of public/protected/private.
+  // This should have been rejected by the dex file verifier. Only do in debug build.
+  if (kIsDebugBuild) {
+    size_t access_mod_count =
+        (((method_access_flags_ & kAccPublic) == 0) ? 0 : 1) +
+        (((method_access_flags_ & kAccProtected) == 0) ? 0 : 1) +
+        (((method_access_flags_ & kAccPrivate) == 0) ? 0 : 1);
+    if (access_mod_count > 1) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "method has more than one of public/protected/private";
+      return false;
+    }
+  }
+
+  // If there aren't any instructions, make sure that's expected, then exit successfully.
+  if (code_item_ == nullptr) {
+    // This should have been rejected by the dex file verifier. Only do in debug build.
+    if (kIsDebugBuild) {
+      // Only native or abstract methods may not have code.
+      if ((method_access_flags_ & (kAccNative | kAccAbstract)) == 0) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "zero-length code in concrete non-native method";
+        return false;
+      }
+      if ((method_access_flags_ & kAccAbstract) != 0) {
+        // Abstract methods are not allowed to have the following flags.
+        static constexpr uint32_t kForbidden =
+            kAccPrivate |
+            kAccStatic |
+            kAccFinal |
+            kAccNative |
+            kAccStrict |
+            kAccSynchronized;
+        if ((method_access_flags_ & kForbidden) != 0) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+                << "method can't be abstract and private/static/final/native/strict/synchronized";
+          return false;
+        }
+      }
+      if ((class_def_->GetJavaAccessFlags() & kAccInterface) != 0) {
+        // Interface methods must be public and abstract.
+        if ((method_access_flags_ & (kAccPublic | kAccAbstract)) != (kAccPublic | kAccAbstract)) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be public and abstract";
+          return false;
+        }
+        // In addition to the above, interface methods must not be protected.
+        static constexpr uint32_t kForbidden = kAccProtected;
+        if ((method_access_flags_ & kForbidden) != 0) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods can't be protected";
+          return false;
+        }
+      }
+      // We also don't allow constructors to be abstract or native.
+      if (IsConstructor()) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "constructors can't be abstract or native";
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // This should have been rejected by the dex file verifier. Only do in debug build.
+  if (kIsDebugBuild) {
+    // When there's code, the method must not be native or abstract.
+    if ((method_access_flags_ & (kAccNative | kAccAbstract)) != 0) {
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "non-zero-length code in abstract or native method";
+      return false;
+    }
+
+    // Only the static initializer may have code in an interface.
+    if ((class_def_->GetJavaAccessFlags() & kAccInterface) != 0) {
+      // Interfaces may have static initializers for their fields.
+      if (!IsConstructor() || !IsStatic()) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interface methods must be abstract";
+        return false;
+      }
+    }
+
+    // Instance constructors must not be synchronized.
+    if (IsInstanceConstructor()) {
+      static constexpr uint32_t kForbidden = kAccSynchronized;
+      if ((method_access_flags_ & kForbidden) != 0) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "constructors can't be synchronized";
+        return false;
+      }
+    }
+  }
+
   // Sanity-check the register counts. ins + locals = registers, so make sure that ins <= registers.
   if (code_item_->ins_size_ > code_item_->registers_size_) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad register counts (ins=" << code_item_->ins_size_
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index b57abf5..ba694b7 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_VERIFIER_METHOD_VERIFIER_H_
 
 #include <memory>
+#include <sstream>
 #include <vector>
 
 #include "base/macros.h"
@@ -262,20 +263,6 @@
   ArtField* GetQuickFieldAccess(const Instruction* inst, RegisterLine* reg_line)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Is the method being verified a constructor?
-  bool IsConstructor() const {
-    return (method_access_flags_ & kAccConstructor) != 0;
-  }
-
-  // Is the method verified static?
-  bool IsStatic() const {
-    return (method_access_flags_ & kAccStatic) != 0;
-  }
-
-  bool IsInstanceConstructor() const {
-    return IsConstructor() && !IsStatic();
-  }
-
   SafeMap<uint32_t, std::set<uint32_t>>& GetStringInitPcRegMap() {
     return string_init_pc_reg_map_;
   }
@@ -284,7 +271,21 @@
     return encountered_failure_types_;
   }
 
+  bool IsInstanceConstructor() const {
+    return IsConstructor() && !IsStatic();
+  }
+
  private:
+  // Is the method being verified a constructor? See the comment on the field.
+  bool IsConstructor() const {
+    return is_constructor_;
+  }
+
+  // Is the method verified static?
+  bool IsStatic() const {
+    return (method_access_flags_ & kAccStatic) != 0;
+  }
+
   // Private constructor for dumping.
   MethodVerifier(Thread* self, const DexFile* dex_file, Handle<mirror::DexCache> dex_cache,
                  Handle<mirror::ClassLoader> class_loader, const DexFile::ClassDef* class_def,
@@ -780,6 +781,13 @@
   // FindLocksAtDexPC, resulting in deadlocks.
   const bool allow_thread_suspension_;
 
+  // Whether the method seems to be a constructor. Note that this field exists as we can't trust
+  // the flags in the dex file. Some older code does not mark methods named "<init>" and "<clinit>"
+  // correctly.
+  //
+  // Note: this flag is only valid once Verify() has started.
+  bool is_constructor_;
+
   // Link, for the method verifier root linked list.
   MethodVerifier* link_;
 
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index e14306c..bb756e9 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -17,6 +17,7 @@
 #include "reg_type_cache-inl.h"
 
 #include "base/casts.h"
+#include "base/stl_util.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "mirror/class-inl.h"
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index 8319de6..93948a1 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -19,7 +19,6 @@
 
 #include "base/casts.h"
 #include "base/macros.h"
-#include "base/stl_util.h"
 #include "object_callbacks.h"
 #include "reg_type.h"
 #include "runtime.h"
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 767e1de..55a77ac 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -22,7 +22,9 @@
 #define CHECK_REGS_CONTAIN_REFS(dex_pc, abort_if_not_found, ...) do { \
   int t[] = {__VA_ARGS__}; \
   int t_size = sizeof(t) / sizeof(*t); \
-  uintptr_t native_quick_pc = m->ToNativeQuickPc(dex_pc, abort_if_not_found); \
+  uintptr_t native_quick_pc = m->ToNativeQuickPc(dex_pc, \
+                                                 /* is_catch_handler */ false, \
+                                                 abort_if_not_found); \
   if (native_quick_pc != UINTPTR_MAX) { \
     CheckReferences(t, t_size, m->NativeQuickPcOffset(native_quick_pc)); \
   } \
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 08ccf0e..5913c40 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -78,6 +78,14 @@
     test_Memory_pokeShort();
     test_Memory_pokeInt();
     test_Memory_pokeLong();
+    test_Integer_numberOfTrailingZeros();
+    test_Long_numberOfTrailingZeros();
+    test_Integer_rotateRight();
+    test_Long_rotateRight();
+    test_Integer_rotateLeft();
+    test_Long_rotateLeft();
+    test_Integer_rotateRightLeft();
+    test_Long_rotateRightLeft();
   }
 
   /**
@@ -1360,4 +1368,136 @@
     poke_long.invoke(null, address + 1, (long)0x2122232425262728L, false);
     Assert.assertTrue(Arrays.equals(ru, b));
   }
+
+  public static void test_Integer_numberOfTrailingZeros() {
+    Assert.assertEquals(Integer.numberOfTrailingZeros(0), Integer.SIZE);
+    for (int i = 0; i < Integer.SIZE; i++) {
+      Assert.assertEquals(
+        Integer.numberOfTrailingZeros(0x80000000 >> i),
+        Integer.SIZE - 1 - i);
+      Assert.assertEquals(
+        Integer.numberOfTrailingZeros((0x80000000 >> i) | 0x80000000),
+        Integer.SIZE - 1 - i);
+      Assert.assertEquals(Integer.numberOfTrailingZeros(1 << i), i);
+    }
+  }
+
+  public static void test_Long_numberOfTrailingZeros() {
+    Assert.assertEquals(Long.numberOfTrailingZeros(0), Long.SIZE);
+    for (int i = 0; i < Long.SIZE; i++) {
+      Assert.assertEquals(
+        Long.numberOfTrailingZeros(0x8000000000000000L >> i),
+        Long.SIZE - 1 - i);
+      Assert.assertEquals(
+        Long.numberOfTrailingZeros((0x8000000000000000L >> i) | 0x8000000000000000L),
+        Long.SIZE - 1 - i);
+      Assert.assertEquals(Long.numberOfTrailingZeros(1L << i), i);
+    }
+  }
+
+  public static void test_Integer_rotateRight() throws Exception {
+    Assert.assertEquals(Integer.rotateRight(0x11, 0), 0x11);
+
+    Assert.assertEquals(Integer.rotateRight(0x11, 1), 0x80000008);
+    Assert.assertEquals(Integer.rotateRight(0x11, Integer.SIZE - 1), 0x22);
+    Assert.assertEquals(Integer.rotateRight(0x11, Integer.SIZE), 0x11);
+    Assert.assertEquals(Integer.rotateRight(0x11, Integer.SIZE + 1), 0x80000008);
+
+    Assert.assertEquals(Integer.rotateRight(0x11, -1), 0x22);
+    Assert.assertEquals(Integer.rotateRight(0x11, -(Integer.SIZE - 1)), 0x80000008);
+    Assert.assertEquals(Integer.rotateRight(0x11, -Integer.SIZE), 0x11);
+    Assert.assertEquals(Integer.rotateRight(0x11, -(Integer.SIZE + 1)), 0x22);
+
+    Assert.assertEquals(Integer.rotateRight(0x80000000, 1), 0x40000000);
+
+    for (int i = 0; i < Integer.SIZE; i++) {
+      Assert.assertEquals(
+        Integer.rotateRight(0xBBAAAADD, i),
+        (0xBBAAAADD >>> i) | (0xBBAAAADD << (Integer.SIZE - i)));
+    }
+  }
+
+  public static void test_Long_rotateRight() throws Exception {
+    Assert.assertEquals(Long.rotateRight(0x11, 0), 0x11);
+
+    Assert.assertEquals(Long.rotateRight(0x11, 1), 0x8000000000000008L);
+    Assert.assertEquals(Long.rotateRight(0x11, Long.SIZE - 1), 0x22);
+    Assert.assertEquals(Long.rotateRight(0x11, Long.SIZE), 0x11);
+    Assert.assertEquals(Long.rotateRight(0x11, Long.SIZE + 1), 0x8000000000000008L);
+
+    Assert.assertEquals(Long.rotateRight(0x11, -1), 0x22);
+    Assert.assertEquals(Long.rotateRight(0x11, -(Long.SIZE - 1)), 0x8000000000000008L);
+    Assert.assertEquals(Long.rotateRight(0x11, -Long.SIZE), 0x11);
+    Assert.assertEquals(Long.rotateRight(0x11, -(Long.SIZE + 1)), 0x22);
+
+    Assert.assertEquals(Long.rotateRight(0x8000000000000000L, 1), 0x4000000000000000L);
+
+    for (int i = 0; i < Long.SIZE; i++) {
+      Assert.assertEquals(
+        Long.rotateRight(0xBBAAAADDFF0000DDL, i),
+        (0xBBAAAADDFF0000DDL >>> i) | (0xBBAAAADDFF0000DDL << (Long.SIZE - i)));
+    }
+  }
+
+  public static void test_Integer_rotateLeft() throws Exception {
+    Assert.assertEquals(Integer.rotateLeft(0x11, 0), 0x11);
+
+    Assert.assertEquals(Integer.rotateLeft(0x11, 1), 0x22);
+    Assert.assertEquals(Integer.rotateLeft(0x11, Integer.SIZE - 1), 0x80000008);
+    Assert.assertEquals(Integer.rotateLeft(0x11, Integer.SIZE), 0x11);
+    Assert.assertEquals(Integer.rotateLeft(0x11, Integer.SIZE + 1), 0x22);
+
+    Assert.assertEquals(Integer.rotateLeft(0x11, -1), 0x80000008);
+    Assert.assertEquals(Integer.rotateLeft(0x11, -(Integer.SIZE - 1)), 0x22);
+    Assert.assertEquals(Integer.rotateLeft(0x11, -Integer.SIZE), 0x11);
+    Assert.assertEquals(Integer.rotateLeft(0x11, -(Integer.SIZE + 1)), 0x80000008);
+
+    Assert.assertEquals(Integer.rotateLeft(0xC0000000, 1), 0x80000001);
+
+    for (int i = 0; i < Integer.SIZE; i++) {
+      Assert.assertEquals(
+        Integer.rotateLeft(0xBBAAAADD, i),
+        (0xBBAAAADD << i) | (0xBBAAAADD >>> (Integer.SIZE - i)));
+    }
+  }
+
+  public static void test_Long_rotateLeft() throws Exception {
+    Assert.assertEquals(Long.rotateLeft(0x11, 0), 0x11);
+
+    Assert.assertEquals(Long.rotateLeft(0x11, 1), 0x22);
+    Assert.assertEquals(Long.rotateLeft(0x11, Long.SIZE - 1), 0x8000000000000008L);
+    Assert.assertEquals(Long.rotateLeft(0x11, Long.SIZE), 0x11);
+    Assert.assertEquals(Long.rotateLeft(0x11, Long.SIZE + 1), 0x22);
+
+    Assert.assertEquals(Long.rotateLeft(0x11, -1), 0x8000000000000008L);
+    Assert.assertEquals(Long.rotateLeft(0x11, -(Long.SIZE - 1)), 0x22);
+    Assert.assertEquals(Long.rotateLeft(0x11, -Long.SIZE), 0x11);
+    Assert.assertEquals(Long.rotateLeft(0x11, -(Long.SIZE + 1)), 0x8000000000000008L);
+
+    Assert.assertEquals(Long.rotateLeft(0xC000000000000000L, 1), 0x8000000000000001L);
+
+    for (int i = 0; i < Long.SIZE; i++) {
+      Assert.assertEquals(
+        Long.rotateLeft(0xBBAAAADDFF0000DDL, i),
+        (0xBBAAAADDFF0000DDL << i) | (0xBBAAAADDFF0000DDL >>> (Long.SIZE - i)));
+    }
+  }
+
+  public static void test_Integer_rotateRightLeft() throws Exception {
+    for (int i = 0; i < Integer.SIZE * 2; i++) {
+      Assert.assertEquals(Integer.rotateLeft(0xBBAAAADD, i),
+                          Integer.rotateRight(0xBBAAAADD, -i));
+      Assert.assertEquals(Integer.rotateLeft(0xBBAAAADD, -i),
+                          Integer.rotateRight(0xBBAAAADD, i));
+    }
+  }
+
+  public static void test_Long_rotateRightLeft() throws Exception {
+    for (int i = 0; i < Long.SIZE * 2; i++) {
+      Assert.assertEquals(Long.rotateLeft(0xBBAAAADDFF0000DDL, i),
+                          Long.rotateRight(0xBBAAAADDFF0000DDL, -i));
+      Assert.assertEquals(Long.rotateLeft(0xBBAAAADDFF0000DDL, -i),
+                          Long.rotateRight(0xBBAAAADDFF0000DDL, i));
+    }
+  }
 }
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 59722ad..78f8842 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -235,6 +235,7 @@
 
   return result ? JNI_TRUE : JNI_FALSE;
 #else
+  UNUSED(pid_int);
   return JNI_FALSE;
 #endif
 }
diff --git a/test/510-checker-try-catch/smali/RegisterAllocator.smali b/test/510-checker-try-catch/smali/RegisterAllocator.smali
new file mode 100644
index 0000000..fd3c84c
--- /dev/null
+++ b/test/510-checker-try-catch/smali/RegisterAllocator.smali
@@ -0,0 +1,94 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LRegisterAllocator;
+
+.super Ljava/lang/Object;
+
+# Test that catch phis are allocated to a stack slot, and that equivalent catch
+# phis are allocated to the same stack slot.
+
+## CHECK-START: int RegisterAllocator.testEquivalentCatchPhiSlot_Single(int, int, int) register (after)
+## CHECK-DAG:     Phi reg:0 is_catch_phi:true locations:{{\[.*\]}}-><<SlotA1:\d+>>(sp)
+## CHECK-DAG:     Phi reg:0 is_catch_phi:true locations:{{\[.*\]}}-><<SlotA2:\d+>>(sp)
+## CHECK-DAG:     Phi reg:1 is_catch_phi:true locations:{{\[.*\]}}-><<SlotB:\d+>>(sp)
+## CHECK-EVAL:    <<SlotA1>> == <<SlotA2>>
+## CHECK-EVAL:    <<SlotB>> != <<SlotA1>>
+
+.method public static testEquivalentCatchPhiSlot_Single(III)I
+  .registers 8
+
+  :try_start
+  const/high16 v0, 0x40000000 # float 2
+  move v1, p0
+  div-int/2addr p0, p1
+
+  const/high16 v0, 0x41000000 # float 8
+  move v1, p1
+  div-int/2addr p0, p2
+  goto :return
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  :catch_all
+  # 2x CatchPhi for v0, 1x for v1
+  if-eqz v1, :use_as_float
+
+  :use_as_int
+  goto :return
+
+  :use_as_float
+  float-to-int v0, v0
+
+  :return
+  return v0
+.end method
+
+# Test that wide catch phis are allocated to two stack slots.
+
+## CHECK-START: long RegisterAllocator.testEquivalentCatchPhiSlot_Wide(int, int, int) register (after)
+## CHECK-DAG:     Phi reg:0 is_catch_phi:true locations:{{\[.*\]}}->2x<<SlotB1:\d+>>(sp)
+## CHECK-DAG:     Phi reg:0 is_catch_phi:true locations:{{\[.*\]}}->2x<<SlotB2:\d+>>(sp)
+## CHECK-DAG:     Phi reg:2 is_catch_phi:true locations:{{\[.*\]}}-><<SlotA:\d+>>(sp)
+## CHECK-EVAL:    <<SlotB1>> == <<SlotB2>>
+## CHECK-EVAL:    abs(<<SlotA>> - <<SlotB1>>) >= 8
+
+.method public static testEquivalentCatchPhiSlot_Wide(III)J
+  .registers 8
+
+  :try_start
+  const-wide/high16 v0, 0x4000000000000000L # double 2
+  move v2, p0
+  div-int/2addr p0, p1
+
+  const-wide/high16 v0, 0x4100000000000000L # double 8
+  move v2, p1
+  div-int/2addr p0, p2
+  goto :return
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  :catch_all
+  # 2x CatchPhi for v0, 1x for v2
+  if-eqz v2, :use_as_double
+
+  :use_as_long
+  goto :return
+
+  :use_as_double
+  double-to-long v0, v0
+
+  :return
+  return-wide v0
+.end method
diff --git a/test/510-checker-try-catch/smali/Runtime.smali b/test/510-checker-try-catch/smali/Runtime.smali
new file mode 100644
index 0000000..19b43a3
--- /dev/null
+++ b/test/510-checker-try-catch/smali/Runtime.smali
@@ -0,0 +1,555 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LRuntime;
+.super Ljava/lang/Object;
+
+# The following tests all share the same structure, signature and return values:
+#  - foo(false, false):  normal path,         returns 42
+#  - foo(true, false):   exceptional path #1, returns 3
+#  - foo(false, true):   exceptional path #2, returns 8
+#  - foo(true, true):    undefined
+
+
+# Test register allocation of 32-bit core intervals crossing catch block positions.
+
+## CHECK-START: int Runtime.testUseAfterCatch_int(boolean, boolean) register (after)
+## CHECK-NOT:     Phi is_catch_phi:true
+
+.method public static testUseAfterCatch_int(ZZ)I
+  .registers 6
+
+  sget-object v0, LRuntime;->intArray:[I
+  const/4 v1, 0
+  aget v1, v0, v1
+  const/4 v2, 1
+  aget v2, v0, v2
+  const/4 v3, 2
+  aget v3, v0, v3
+
+  :try_start
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  return v3  # Normal path return.
+
+  :catch_all
+  if-eqz p0, :second_throw
+  return v1  # Exceptional path #1 return.
+
+  :second_throw
+  return v2  # Exceptional path #2 return.
+.end method
+
+
+# Test register allocation of 64-bit core intervals crossing catch block positions.
+
+# The sum of the low and high 32 bits treated as integers is returned to prove
+# that both vregs allocated correctly.
+
+## CHECK-START: int Runtime.testUseAfterCatch_long(boolean, boolean) register (after)
+## CHECK-NOT:     Phi is_catch_phi:true
+
+.method public static testUseAfterCatch_long(ZZ)I
+  .registers 10
+
+  sget-object v0, LRuntime;->longArray:[J
+  const/4 v1, 0
+  aget-wide v1, v0, v1
+  const/4 v3, 1
+  aget-wide v3, v0, v3
+  const/4 v5, 2
+  aget-wide v5, v0, v5
+
+  :try_start
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  const v0, 32
+  ushr-long v7, v5, v0
+  long-to-int v5, v5
+  long-to-int v7, v7
+  add-int/2addr v5, v7
+  return v5  # Normal path return.
+
+  :catch_all
+  const v0, 32
+  if-eqz p0, :second_throw
+
+  ushr-long v7, v1, v0
+  long-to-int v1, v1
+  long-to-int v7, v7
+  add-int/2addr v1, v7
+  return v1  # Exceptional path #1 return.
+
+  :second_throw
+  ushr-long v7, v3, v0
+  long-to-int v3, v3
+  long-to-int v7, v7
+  add-int/2addr v3, v7
+  return v3  # Exceptional path #2 return.
+.end method
+
+
+# Test register allocation of 32-bit floating-point intervals crossing catch block positions.
+
+## CHECK-START: int Runtime.testUseAfterCatch_float(boolean, boolean) register (after)
+## CHECK-NOT:     Phi is_catch_phi:true
+
+.method public static testUseAfterCatch_float(ZZ)I
+  .registers 6
+
+  sget-object v0, LRuntime;->floatArray:[F
+  const/4 v1, 0
+  aget v1, v0, v1
+  const/4 v2, 1
+  aget v2, v0, v2
+  const/4 v3, 2
+  aget v3, v0, v3
+
+  :try_start
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  float-to-int v3, v3
+  return v3  # Normal path return.
+
+  :catch_all
+  if-eqz p0, :second_throw
+  float-to-int v1, v1
+  return v1  # Exceptional path #1 return.
+
+  :second_throw
+  float-to-int v2, v2
+  return v2  # Exceptional path #2 return.
+.end method
+
+
+# Test register allocation of 64-bit floating-point intervals crossing catch block positions.
+
+## CHECK-START: int Runtime.testUseAfterCatch_double(boolean, boolean) register (after)
+## CHECK-NOT:     Phi is_catch_phi:true
+
+.method public static testUseAfterCatch_double(ZZ)I
+  .registers 10
+
+  sget-object v0, LRuntime;->doubleArray:[D
+  const/4 v1, 0
+  aget-wide v1, v0, v1
+  const/4 v3, 1
+  aget-wide v3, v0, v3
+  const/4 v5, 2
+  aget-wide v5, v0, v5
+
+  :try_start
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  double-to-int v5, v5
+  return v5  # Normal path return.
+
+  :catch_all
+  if-eqz p0, :second_throw
+  double-to-int v1, v1
+  return v1  # Exceptional path #1 return.
+
+  :second_throw
+  double-to-int v3, v3
+  return v3  # Exceptional path #2 return.
+.end method
+
+
+# Test catch-phi runtime support for constant values.
+
+# Register v0 holds different constants at two throwing instructions. Runtime is
+# expected to load them from stack map and copy to the catch phi's location.
+
+## CHECK-START: int Runtime.testCatchPhi_const(boolean, boolean) register (after)
+## CHECK-DAG:     <<Const3:i\d+>> IntConstant 3
+## CHECK-DAG:     <<Const8:i\d+>> IntConstant 8
+## CHECK-DAG:                     Phi [<<Const3>>,<<Const8>>] is_catch_phi:true
+
+.method public static testCatchPhi_const(ZZ)I
+  .registers 3
+
+  :try_start
+  const v0, 3
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+
+  const v0, 8
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  const v0, 42
+  return v0  # Normal path return.
+
+  :catch_all
+  return v0  # Exceptional path #1/#2 return.
+.end method
+
+
+# Test catch-phi runtime support for 32-bit values stored in core registers.
+
+# Register v0 holds different integer values at two throwing instructions.
+# Runtime is expected to find their location in the stack map and copy the value
+# to the location of the catch phi.
+
+## CHECK-START: int Runtime.testCatchPhi_int(boolean, boolean) register (after)
+## CHECK-DAG:     <<Val1:i\d+>> ArrayGet
+## CHECK-DAG:     <<Val2:i\d+>> ArrayGet
+## CHECK-DAG:                   Phi [<<Val1>>,<<Val2>>] is_catch_phi:true
+
+.method public static testCatchPhi_int(ZZ)I
+  .registers 6
+
+  sget-object v0, LRuntime;->intArray:[I
+  const/4 v1, 0
+  aget v1, v0, v1
+  const/4 v2, 1
+  aget v2, v0, v2
+  const/4 v3, 2
+  aget v3, v0, v3
+
+  :try_start
+  move v0, v1  # Set catch phi value
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+
+  move v0, v2  # Set catch phi value
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  return v3  # Normal path return.
+
+  :catch_all
+  return v0  # Exceptional path #1/#2 return.
+.end method
+
+
+# Test catch-phi runtime support for 64-bit values stored in core registers.
+
+# Register pair (v0, v1) holds different long values at two throwing instructions.
+# Runtime is expected to find their location in the stack map and copy the value
+# to the location of the catch phi. The sum of the low and high 32 bits treated
+# as integers is returned to prove that both vregs were copied.
+
+# Note: values will be spilled on x86 because of too few callee-save core registers.
+
+## CHECK-START: int Runtime.testCatchPhi_long(boolean, boolean) register (after)
+## CHECK-DAG:     <<Val1:j\d+>> ArrayGet
+## CHECK-DAG:     <<Val2:j\d+>> ArrayGet
+## CHECK-DAG:                   Phi [<<Val1>>,<<Val2>>] is_catch_phi:true
+
+.method public static testCatchPhi_long(ZZ)I
+  .registers 10
+
+  sget-object v0, LRuntime;->longArray:[J
+  const/4 v2, 0
+  aget-wide v2, v0, v2
+  const/4 v4, 1
+  aget-wide v4, v0, v4
+  const/4 v6, 2
+  aget-wide v6, v0, v6
+
+  :try_start
+  move-wide v0, v2  # Set catch phi value
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+
+  move-wide v0, v4  # Set catch phi value
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  const v2, 32
+  ushr-long v2, v6, v2
+  long-to-int v2, v2
+  long-to-int v6, v6
+  add-int/2addr v6, v2
+  return v6  # Normal path return.
+
+  :catch_all
+  const v2, 32
+  ushr-long v2, v0, v2
+  long-to-int v2, v2
+  long-to-int v0, v0
+  add-int/2addr v0, v2
+  return v0  # Exceptional path #1/#2 return.
+.end method
+
+
+# Test catch-phi runtime support for 32-bit values stored in FPU registers.
+
+# Register v0 holds different float values at two throwing instructions. Runtime
+# is expected to find their location in the stack map and copy the value to the
+# location of the catch phi. The value is converted to int and returned.
+
+# Note: values will be spilled on x86 as there are no callee-save FPU registers.
+
+## CHECK-START: int Runtime.testCatchPhi_float(boolean, boolean) register (after)
+## CHECK-DAG:     <<Val1:f\d+>> ArrayGet
+## CHECK-DAG:     <<Val2:f\d+>> ArrayGet
+## CHECK-DAG:                   Phi [<<Val1>>,<<Val2>>] is_catch_phi:true
+
+.method public static testCatchPhi_float(ZZ)I
+  .registers 6
+
+  sget-object v0, LRuntime;->floatArray:[F
+  const/4 v1, 0
+  aget v1, v0, v1
+  const/4 v2, 1
+  aget v2, v0, v2
+  const/4 v3, 2
+  aget v3, v0, v3
+
+  :try_start
+  move v0, v1  # Set catch phi value
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+
+  move v0, v2  # Set catch phi value
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  float-to-int v3, v3
+  return v3  # Normal path return.
+
+  :catch_all
+  float-to-int v0, v0
+  return v0  # Exceptional path #1/#2 return.
+.end method
+
+
+# Test catch-phi runtime support for 64-bit values stored in FPU registers.
+
+# Register pair (v0, v1) holds different double values at two throwing instructions.
+# Runtime is expected to find their location in the stack map and copy the value
+# to the location of the catch phi. The value is converted to int and returned.
+# Values were chosen so that all 64 bits are used.
+
+# Note: values will be spilled on x86 as there are no callee-save FPU registers.
+
+## CHECK-START: int Runtime.testCatchPhi_double(boolean, boolean) register (after)
+## CHECK-DAG:     <<Val1:d\d+>> ArrayGet
+## CHECK-DAG:     <<Val2:d\d+>> ArrayGet
+## CHECK-DAG:                   Phi [<<Val1>>,<<Val2>>] is_catch_phi:true
+
+.method public static testCatchPhi_double(ZZ)I
+  .registers 10
+
+  sget-object v0, LRuntime;->doubleArray:[D
+  const/4 v2, 0
+  aget-wide v2, v0, v2
+  const/4 v4, 1
+  aget-wide v4, v0, v4
+  const/4 v6, 2
+  aget-wide v6, v0, v6
+
+  :try_start
+  move-wide v0, v2  # Set catch phi value
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+
+  move-wide v0, v4  # Set catch phi value
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  double-to-int v6, v6
+  return v6
+
+  :catch_all
+  double-to-int v0, v0
+  return v0
+.end method
+
+# Test catch-phi runtime support for 32-bit values stored on the stack.
+
+# Register v0 holds different integer values at two throwing instructions.
+# These values were forced to spill by an always-throwing try/catch after their
+# definition. Runtime is expected to find their location in the stack map and
+# copy the value to the location of the catch phi. The value is then returned.
+
+## CHECK-START: int Runtime.testCatchPhi_singleSlot(boolean, boolean) register (after)
+## CHECK:         <<Val1:i\d+>> ArrayGet
+## CHECK-NEXT:                  ParallelMove moves:[{{.*->}}{{\d+}}(sp)]
+## CHECK:         <<Val2:i\d+>> ArrayGet
+## CHECK-NEXT:                  ParallelMove moves:[{{.*->}}{{\d+}}(sp)]
+## CHECK:                       Phi [<<Val1>>,<<Val2>>] is_catch_phi:true
+
+.method public static testCatchPhi_singleSlot(ZZ)I
+  .registers 6
+
+  sget-object v0, LRuntime;->intArray:[I
+  const/4 v1, 0
+  aget v1, v0, v1
+  const/4 v2, 1
+  aget v2, v0, v2
+  const/4 v3, 2
+  aget v3, v0, v3
+
+  # Insert a try/catch to force v1,v2,v3 to spill.
+  :try_start_spill
+  const/4 v0, 1
+  invoke-static {v0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end_spill
+  .catchall {:try_start_spill .. :try_end_spill} :catch_all_spill
+  return v0         # Unreachable
+  :catch_all_spill  # Catch and continue
+
+  :try_start
+  move v0, v1  # Set catch phi value
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+
+  move v0, v2  # Set catch phi value
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  return v3  # Normal path return.
+
+  :catch_all
+  return v0  # Exceptional path #1/#2 return.
+.end method
+
+# Test catch-phi runtime support for 64-bit values stored on the stack.
+
+# Register pair (v0, v1) holds different double values at two throwing instructions.
+# These values were forced to spill by an always-throwing try/catch after their
+# definition. Runtime is expected to find their location in the stack map and
+# copy the value to the location of the catch phi. The value is converted to int
+# and returned. Values were chosen so that all 64 bits are used.
+
+## CHECK-START: int Runtime.testCatchPhi_doubleSlot(boolean, boolean) register (after)
+## CHECK:         <<Val1:d\d+>> ArrayGet
+## CHECK-NEXT:                  ParallelMove moves:[{{.*->}}2x{{\d+}}(sp)]
+## CHECK:         <<Val2:d\d+>> ArrayGet
+## CHECK-NEXT:                  ParallelMove moves:[{{.*->}}2x{{\d+}}(sp)]
+## CHECK:                       Phi [<<Val1>>,<<Val2>>] is_catch_phi:true
+
+.method public static testCatchPhi_doubleSlot(ZZ)I
+  .registers 10
+
+  sget-object v0, LRuntime;->doubleArray:[D
+  const/4 v2, 0
+  aget-wide v2, v0, v2
+  const/4 v4, 1
+  aget-wide v4, v0, v4
+  const/4 v6, 2
+  aget-wide v6, v0, v6
+
+  # Insert a try/catch to force (v2, v3), (v4, v5), (v6, v7) to spill.
+  :try_start_spill
+  const/4 v0, 1
+  invoke-static {v0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end_spill
+  .catchall {:try_start_spill .. :try_end_spill} :catch_all_spill
+  return v0         # Unreachable
+  :catch_all_spill  # Catch and continue
+
+  :try_start
+  move-wide v0, v2  # Set catch phi value
+  invoke-static {p0}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+
+  move-wide v0, v4  # Set catch phi value
+  invoke-static {p1}, LRuntime;->$noinline$ThrowIfTrue(Z)V
+  :try_end
+  .catchall {:try_start .. :try_end} :catch_all
+
+  double-to-int v6, v6
+  return v6  # Normal path return.
+
+  :catch_all
+  double-to-int v0, v0
+  return v0  # Exceptional path #1/#2 return.
+.end method
+
+
+
+# Helper methods and initialization.
+
+.method public static $noinline$ThrowIfTrue(Z)V
+  .registers 2
+  if-nez p0, :throw
+  return-void
+
+  :throw
+  new-instance v0, Ljava/lang/Exception;
+  invoke-direct {v0}, Ljava/lang/Exception;-><init>()V
+  throw v0
+.end method
+
+.method public static constructor <clinit>()V
+  .registers 2
+
+  const/4 v1, 4
+
+  new-array v0, v1, [I
+  fill-array-data v0, :array_int
+  sput-object v0, LRuntime;->intArray:[I
+
+  new-array v0, v1, [J
+  fill-array-data v0, :array_long
+  sput-object v0, LRuntime;->longArray:[J
+
+  new-array v0, v1, [F
+  fill-array-data v0, :array_float
+  sput-object v0, LRuntime;->floatArray:[F
+
+  new-array v0, v1, [D
+  fill-array-data v0, :array_double
+  sput-object v0, LRuntime;->doubleArray:[D
+
+  return-void
+
+:array_int
+.array-data 4
+  0x03  # int 3
+  0x08  # int 8
+  0x2a  # int 42
+.end array-data
+
+:array_long
+.array-data 8
+  0x0000000100000002L # long (1 << 32) + 2
+  0x0000000500000003L # long (5 << 32) + 3
+  0x0000001e0000000cL # long (30 << 32) + 12
+.end array-data
+
+:array_float
+.array-data 4
+  0x40400000  # float 3
+  0x41000000  # float 8
+  0x42280000  # float 42
+.end array-data
+
+:array_double
+.array-data 8
+  0x400b333333333333L  # double 3.4
+  0x4020cccccccccccdL  # double 8.4
+  0x4045333333333333L  # double 42.4
+.end array-data
+.end method
+
+.field public static intArray:[I
+.field public static longArray:[J
+.field public static floatArray:[F
+.field public static doubleArray:[D
diff --git a/test/510-checker-try-catch/src/Main.java b/test/510-checker-try-catch/src/Main.java
index ae78ba0..25cdc0e 100644
--- a/test/510-checker-try-catch/src/Main.java
+++ b/test/510-checker-try-catch/src/Main.java
@@ -14,10 +14,55 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Method;
+
 public class Main {
 
   // Workaround for b/18051191.
   class InnerClass {}
 
-  public static void main(String[] args) {}
+  public enum TestPath {
+    ExceptionalFlow1(true, false, 3),
+    ExceptionalFlow2(false, true, 8),
+    NormalFlow(false, false, 42);
+
+    TestPath(boolean arg1, boolean arg2, int expected) {
+      this.arg1 = arg1;
+      this.arg2 = arg2;
+      this.expected = expected;
+    }
+
+    public boolean arg1;
+    public boolean arg2;
+    public int expected;
+  }
+
+  public static void testMethod(String method) throws Exception {
+    Class<?> c = Class.forName("Runtime");
+    Method m = c.getMethod(method, new Class[] { boolean.class, boolean.class });
+
+    for (TestPath path : TestPath.values()) {
+      Object[] arguments = new Object[] { path.arg1, path.arg2 };
+      int actual = (Integer) m.invoke(null, arguments);
+
+      if (actual != path.expected) {
+        throw new Error("Method: \"" + method + "\", path: " + path + ", " +
+                        "expected: " + path.expected + ", actual: " + actual);
+      }
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    testMethod("testUseAfterCatch_int");
+    testMethod("testUseAfterCatch_long");
+    testMethod("testUseAfterCatch_float");
+    testMethod("testUseAfterCatch_double");
+    testMethod("testCatchPhi_const");
+    testMethod("testCatchPhi_int");
+    testMethod("testCatchPhi_long");
+    testMethod("testCatchPhi_float");
+    testMethod("testCatchPhi_double");
+    testMethod("testCatchPhi_singleSlot");
+    testMethod("testCatchPhi_doubleSlot");
+  }
 }
diff --git a/test/530-checker-loops/expected.txt b/test/530-checker-loops/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/530-checker-loops/expected.txt
diff --git a/test/530-checker-loops/info.txt b/test/530-checker-loops/info.txt
new file mode 100644
index 0000000..f5d334d
--- /dev/null
+++ b/test/530-checker-loops/info.txt
@@ -0,0 +1 @@
+Test on loop optimizations.
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
new file mode 100644
index 0000000..e518a61
--- /dev/null
+++ b/test/530-checker-loops/src/Main.java
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on loop optimizations.
+//
+public class Main {
+
+  static int sResult;
+
+  //
+  // Various sequence variables where bound checks can be removed from loop.
+  //
+
+  /// CHECK-START: int Main.linear(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linear(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linear(int[] x) {
+    int result = 0;
+    for (int i = 0; i < x.length; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDown(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearDown(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearDown(int[] x) {
+    int result = 0;
+    for (int i = x.length - 1; i >= 0; i--) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearObscure(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearObscure(int[] x) {
+    int result = 0;
+    for (int i = x.length - 1; i >= 0; i--) {
+      int k = i + 5;
+      result += x[k - 5];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWhile(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearWhile(int[] x) {
+    int i = 0;
+    int result = 0;
+    while (i < x.length) {
+      result += x[i++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int wrapAroundThenLinear(int[] x) {
+    // Loop with wrap around (length - 1, 0, 1, 2, ..).
+    int w = x.length - 1;
+    int result = 0;
+    for (int i = 0; i < x.length; i++) {
+      result += x[w];
+      w = i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int[] linearWithParameter(int n) {
+    int[] x = new int[n];
+    for (int i = 0; i < n; i++) {
+      x[i] = i;
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearWithCompoundStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
+    int result = 0;
+    for (int i = 0; i <= 12; ) {
+      i++;
+      result += x[i];
+      i++;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearWithLargePositiveStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+    int result = 0;
+    int k = 0;
+    // Range analysis has no problem with a trip-count defined by a
+    // reasonably large positive stride.
+    for (int i = 1; i <= 10 * 10000000 + 1; i += 10000000) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  private static int linearWithVeryLargePositiveStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+    int result = 0;
+    int k = 0;
+    // Range analysis conservatively bails due to potential of wrap-around
+    // arithmetic while computing the trip-count for this very large stride.
+    for (int i = 1; i < 2147483647; i += 195225786) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int linearWithLargeNegativeStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+    int result = 0;
+    int k = 0;
+    // Range analysis has no problem with a trip-count defined by a
+    // reasonably large negative stride.
+    for (int i = -1; i >= -10 * 10000000 - 1; i -= 10000000) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  private static int linearWithVeryLargeNegativeStride() {
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+    int result = 0;
+    int k = 0;
+    // Range analysis conservatively bails due to potential of wrap-around
+    // arithmetic while computing the trip-count for this very large stride.
+    for (int i = -2; i > -2147483648; i -= 195225786) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int periodicIdiom(int tc) {
+    int[] x = { 1, 3 };
+    // Loop with periodic sequence (0, 1).
+    int k = 0;
+    int result = 0;
+    for (int i = 0; i < tc; i++) {
+      result += x[k];
+      k = 1 - k;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicSequence2(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int periodicSequence2(int tc) {
+    int[] x = { 1, 3 };
+    // Loop with periodic sequence (0, 1).
+    int k = 0;
+    int l = 1;
+    int result = 0;
+    for (int i = 0; i < tc; i++) {
+      result += x[k];
+      int t = l;
+      l = k;
+      k = t;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicSequence4(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static int periodicSequence4(int tc) {
+    int[] x = { 1, 3, 5, 7 };
+    // Loop with periodic sequence (0, 1, 2, 3).
+    int k = 0;
+    int l = 1;
+    int m = 2;
+    int n = 3;
+    int result = 0;
+    for (int i = 0; i < tc; i++) {
+      result += x[k] + x[l] + x[m] + x[n];  // all used at once
+      int t = n;
+      n = k;
+      k = l;
+      l = m;
+      m = t;
+    }
+    return result;
+  }
+
+  //
+  // Cases that actually go out of bounds. These test cases
+  // ensure the exceptions are thrown at the right places.
+  //
+
+  private static void lowerOOB(int[] x) {
+    for (int i = -1; i < x.length; i++) {
+      sResult += x[i];
+    }
+  }
+
+  private static void upperOOB(int[] x) {
+    for (int i = 0; i <= x.length; i++) {
+      sResult += x[i];
+    }
+  }
+
+  //
+  // Verifier.
+  //
+
+  public static void main(String[] args) {
+    int[] empty = { };
+    int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+
+    // Linear and wrap-around.
+    expectEquals(0, linear(empty));
+    expectEquals(55, linear(x));
+    expectEquals(0, linearDown(empty));
+    expectEquals(55, linearDown(x));
+    expectEquals(0, linearObscure(empty));
+    expectEquals(55, linearObscure(x));
+    expectEquals(0, linearWhile(empty));
+    expectEquals(55, linearWhile(x));
+    expectEquals(0, wrapAroundThenLinear(empty));
+    expectEquals(55, wrapAroundThenLinear(x));
+
+    // Linear with parameter.
+    sResult = 0;
+    try {
+      linearWithParameter(-1);
+    } catch (NegativeArraySizeException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    for (int n = 0; n < 32; n++) {
+      int[] r = linearWithParameter(n);
+      expectEquals(n, r.length);
+      for (int i = 0; i < n; i++) {
+        expectEquals(i, r[i]);
+      }
+    }
+
+    // Linear with non-unit strides.
+    expectEquals(56, linearWithCompoundStride());
+    expectEquals(66, linearWithLargePositiveStride());
+    expectEquals(66, linearWithVeryLargePositiveStride());
+    expectEquals(66, linearWithLargeNegativeStride());
+    expectEquals(66, linearWithVeryLargeNegativeStride());
+
+    // Periodic adds (1, 3), one at the time.
+    expectEquals(0, periodicIdiom(-1));
+    for (int tc = 0; tc < 32; tc++) {
+      int expected = (tc >> 1) << 2;
+      if ((tc & 1) != 0)
+        expected += 1;
+      expectEquals(expected, periodicIdiom(tc));
+    }
+
+    // Periodic adds (1, 3), one at the time.
+    expectEquals(0, periodicSequence2(-1));
+    for (int tc = 0; tc < 32; tc++) {
+      int expected = (tc >> 1) << 2;
+      if ((tc & 1) != 0)
+        expected += 1;
+      expectEquals(expected, periodicSequence2(tc));
+    }
+
+    // Periodic adds (1, 3, 5, 7), all at once.
+    expectEquals(0, periodicSequence4(-1));
+    for (int tc = 0; tc < 32; tc++) {
+      expectEquals(tc * 16, periodicSequence4(tc));
+    }
+
+    // Lower bound goes OOB.
+    sResult = 0;
+    try {
+      lowerOOB(x);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+
+    // Upper bound goes OOB.
+    sResult = 0;
+    try {
+      upperOOB(x);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/530-checker-regression-reftype-final/expected.txt b/test/530-checker-regression-reftype-final/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/530-checker-regression-reftype-final/expected.txt
diff --git a/test/530-checker-regression-reftype-final/info.txt b/test/530-checker-regression-reftype-final/info.txt
new file mode 100644
index 0000000..07789d6
--- /dev/null
+++ b/test/530-checker-regression-reftype-final/info.txt
@@ -0,0 +1 @@
+Regression test for optimizing that used assume that array types are always exact.
diff --git a/test/530-checker-regression-reftype-final/smali/TestCase.smali b/test/530-checker-regression-reftype-final/smali/TestCase.smali
new file mode 100644
index 0000000..8fd7bb7
--- /dev/null
+++ b/test/530-checker-regression-reftype-final/smali/TestCase.smali
@@ -0,0 +1,59 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# Inliner used to assign exact type to the artificial multiple-return phi if the
+# class type was final which does not hold for arrays.
+
+# The type information is only used by recursive calls to the inliner and is
+# overwritten by the next pass of reference type propagation. Since we do not
+# inline any methods from array classes, this bug cannot be triggered and we
+# verify it using Checker.
+
+## CHECK-START: void TestCase.testInliner() reference_type_propagation_after_inlining (before)
+## CHECK-DAG:             CheckCast [<<Phi:l\d+>>,{{l\d+}}]
+## CHECK-DAG:    <<Phi>>  Phi klass:java.lang.Object[] exact:false
+
+.method public static testInliner()V
+  .registers 3
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  move-result-wide v0
+  long-to-int v0, v0
+
+  invoke-static {v0}, LTestCase;->$inline$getArray(I)[Ljava/lang/Object;
+  move-result-object v0
+
+  check-cast v0, [LMain$MyClassA;
+  return-void
+
+.end method
+
+.method public static $inline$getArray(I)[Ljava/lang/Object;
+  .registers 2
+  if-eqz p0, :else
+
+  :then
+  const/4 v0, 2
+  new-array v0, v0, [LMain$MyClassA;
+  return-object v0
+
+  :else
+  const/4 v0, 3
+  new-array v0, v0, [LMain$MyClassB;
+  return-object v0
+
+.end method
diff --git a/test/530-checker-regression-reftype-final/src/Main.java b/test/530-checker-regression-reftype-final/src/Main.java
new file mode 100644
index 0000000..f86b515
--- /dev/null
+++ b/test/530-checker-regression-reftype-final/src/Main.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main  {
+
+  class MyClassA {}
+  class MyClassB extends MyClassA {}
+
+  public static void main(String[] args) throws Exception {
+    testReferenceTypePropagation();
+    invokeTestInliner();
+  }
+
+  // Reference type propagation (RTP) used to assume that if a class is final,
+  // then the type must be exact. This does not hold for arrays which are always
+  // final, i.e. not extendable, but may be assigned to from values of the
+  // components type subclasses.
+
+  public static void testReferenceTypePropagation() throws Exception {
+    boolean expectTrue;
+
+    // Bug #1: RTP would set the type of `array` to exact Object[]. Instruction
+    // simplifier would then simplify the instanceof to `false`.
+    Object[] array = $noinline$getArray();
+    expectTrue = array instanceof MyClassA[];
+    if (!expectTrue) {
+      throw new Exception("Incorrect type check.");
+    }
+
+    // Bug #2: This is the true-branch of the instanceof above. The bound type
+    // for `array` would be again set to exact MyClassA[] and incorrectly
+    // simplify the second instanceof to `false`.
+    expectTrue = array instanceof MyClassB[];
+    if (!expectTrue) {
+      throw new Exception("Incorrect type bound.");
+    }
+  }
+
+  public static void invokeTestInliner() throws Exception {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod("testInliner");
+    m.invoke(null);
+  }
+
+  public static Object[] $noinline$getArray() {
+    if (doThrow) throw new Error();
+    return new MyClassB[2];
+  }
+
+  static boolean doThrow = false;
+}
diff --git a/test/800-smali/smali/b_18380491AbstractBase.smali b/test/800-smali/smali/b_18380491AbstractBase.smali
index 7aa1b1a..cc05221 100644
--- a/test/800-smali/smali/b_18380491AbstractBase.smali
+++ b/test/800-smali/smali/b_18380491AbstractBase.smali
@@ -1,4 +1,4 @@
-.class public LB18380491ActractBase;
+.class public abstract LB18380491AbstractBase;
 
 .super Ljava/lang/Object;
 
diff --git a/test/800-smali/smali/b_18380491ConcreteClass.smali b/test/800-smali/smali/b_18380491ConcreteClass.smali
index db5ef3b..1ba684f 100644
--- a/test/800-smali/smali/b_18380491ConcreteClass.smali
+++ b/test/800-smali/smali/b_18380491ConcreteClass.smali
@@ -1,10 +1,10 @@
 .class public LB18380491ConcreteClass;
 
-.super LB18380491ActractBase;
+.super LB18380491AbstractBase;
 
 .method public constructor <init>()V
     .locals 0
-    invoke-direct {p0}, LB18380491ActractBase;-><init>()V
+    invoke-direct {p0}, LB18380491AbstractBase;-><init>()V
     return-void
 .end method
 
@@ -13,7 +13,7 @@
   if-eqz p1, :invoke_super_abstract
   return p1
   :invoke_super_abstract
-  invoke-super {p0, p1}, LB18380491ActractBase;->foo(I)I
+  invoke-super {p0, p1}, LB18380491AbstractBase;->foo(I)I
   move-result v0
   return v0
 .end method
diff --git a/test/999-jni-perf/check b/test/999-jni-perf/check
deleted file mode 100755
index ffbb8cf..0000000
--- a/test/999-jni-perf/check
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2014 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Only compare the last line.
-tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/999-jni-perf/expected.txt b/test/999-jni-perf/expected.txt
deleted file mode 100644
index a965a70..0000000
--- a/test/999-jni-perf/expected.txt
+++ /dev/null
@@ -1 +0,0 @@
-Done
diff --git a/test/999-jni-perf/src/Main.java b/test/999-jni-perf/src/Main.java
deleted file mode 100644
index 032e700..0000000
--- a/test/999-jni-perf/src/Main.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class Main {
-  public Main() {
-  }
-
-  private static final String MSG = "ABCDE";
-
-  native int perfJniEmptyCall();
-  native int perfSOACall();
-  native int perfSOAUncheckedCall();
-
-  int runPerfTest(long N) {
-    long start = System.nanoTime();
-    for (long i = 0; i < N; i++) {
-      char c = MSG.charAt(2);
-    }
-    long elapse = System.nanoTime() - start;
-    System.out.println("Fast JNI (charAt): " + (double)elapse / N);
-
-    start = System.nanoTime();
-    for (long i = 0; i < N; i++) {
-      perfJniEmptyCall();
-    }
-    elapse = System.nanoTime() - start;
-    System.out.println("Empty call: " + (double)elapse / N);
-
-    start = System.nanoTime();
-    for (long i = 0; i < N; i++) {
-      perfSOACall();
-    }
-    elapse = System.nanoTime() - start;
-    System.out.println("SOA call: " + (double)elapse / N);
-
-    start = System.nanoTime();
-    for (long i = 0; i < N; i++) {
-      perfSOAUncheckedCall();
-    }
-    elapse = System.nanoTime() - start;
-    System.out.println("SOA unchecked call: " + (double)elapse / N);
-
-    return 0;
-  }
-
-  public static void main(String[] args) {
-    System.loadLibrary(args[0]);
-    long iterations = 1000000;
-    if (args.length > 1) {
-      iterations = Long.parseLong(args[1], 10);
-    }
-    Main m = new Main();
-    m.runPerfTest(iterations);
-    System.out.println("Done");
-  }
-}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 1d3ce8b..7f05a04 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -38,8 +38,7 @@
   457-regs/regs_jni.cc \
   461-get-reference-vreg/get_reference_vreg_jni.cc \
   466-get-live-vreg/get_live_vreg_jni.cc \
-  497-inlining-and-class-loader/clear_dex_cache.cc \
-  999-jni-perf/perf-jni.cc
+  497-inlining-and-class-loader/clear_dex_cache.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
@@ -73,7 +72,7 @@
     LOCAL_MODULE_TAGS := tests
   endif
   LOCAL_SRC_FILES := $(LIBARTTEST_COMMON_SRC_FILES)
-  LOCAL_SHARED_LIBRARIES += libart$$(suffix) libbacktrace
+  LOCAL_SHARED_LIBRARIES += libart$$(suffix) libbacktrace libnativehelper
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libarttest.mk
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index ad3fb41..efc0bfb 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -39,7 +39,11 @@
 SECONDARY_DEX=""
 TIME_OUT="gdb"  # "n" (disabled), "timeout" (use timeout), "gdb" (use gdb)
 # Value in seconds
-TIME_OUT_VALUE=600  # 10 minutes.
+if [ "$ART_USE_READ_BARRIER" = "true" ]; then
+  TIME_OUT_VALUE=900  # 15 minutes.
+else
+  TIME_OUT_VALUE=600  # 10 minutes.
+fi
 USE_GDB="n"
 USE_JVM="n"
 VERIFY="y" # y=yes,n=no,s=softfail
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index e28de09..a84365a 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -32,6 +32,11 @@
   exit 1
 fi
 
+emulator="no"
+if [ "$ANDROID_SERIAL" = "emulator-5554" ]; then
+  emulator="yes"
+fi
+
 # Packages that currently work correctly with the expectation files.
 working_packages=("dalvik.system"
                   "libcore.icu"
@@ -81,10 +86,12 @@
     # Remove the --debug from the arguments.
     vogar_args=${vogar_args/$1}
     vogar_args="$vogar_args --vm-arg -XXlib:libartd.so"
-    # Increase the timeout, as vogar cannot set individual test
-    # timeout when being asked to run packages, and some tests go above
-    # the default timeout.
-    vogar_args="$vogar_args --timeout 240"
+    if [ "$emulator" = "no" ]; then
+      # Increase the timeout, as vogar cannot set individual test
+      # timeout when being asked to run packages, and some tests go above
+      # the default timeout.
+      vogar_args="$vogar_args --timeout 240"
+    fi
     shift
   elif [[ "$1" == "" ]]; then
     break
@@ -93,6 +100,11 @@
   fi
 done
 
+if [ "$emulator" = "yes" ]; then
+  # Be very patient with the emulator.
+  vogar_args="$vogar_args --timeout 480"
+fi
+
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"