Merge "Revert "ART: Add Mterp export pc poison testing mode""
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 1afbdfc..009933d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -65,15 +65,18 @@
 	$(call dexpreopt-remove-classes.dex,$@)
 
 # Dex file dependencies for each gtest.
+ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
+
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Statics StaticsFromCode
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
+ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS)
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
 ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
 ART_GTEST_jni_internal_test_DEX_DEPS := AllFields StaticLeafMethods
-ART_GTEST_oat_file_assistant_test_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
+ART_GTEST_oat_file_assistant_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS)
 ART_GTEST_oat_file_test_DEX_DEPS := Main MultiDex
 ART_GTEST_oat_test_DEX_DEPS := Main
 ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
@@ -89,14 +92,25 @@
 ART_GTEST_elf_writer_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32)
 ART_GTEST_elf_writer_test_TARGET_DEPS := $(TARGET_CORE_IMAGE_default_no-pic_64) $(TARGET_CORE_IMAGE_default_no-pic_32)
 
-ART_GTEST_oat_file_assistant_test_HOST_DEPS := \
+ART_GTEST_dex2oat_environment_tests_HOST_DEPS := \
   $(HOST_CORE_IMAGE_default_no-pic_64) \
-  $(HOST_CORE_IMAGE_default_no-pic_32) \
-  $(HOST_OUT_EXECUTABLES)/patchoatd
-ART_GTEST_oat_file_assistant_test_TARGET_DEPS := \
+  $(HOST_CORE_IMAGE_default_no-pic_32)
+ART_GTEST_dex2oat_environment_tests_TARGET_DEPS := \
   $(TARGET_CORE_IMAGE_default_no-pic_64) \
-  $(TARGET_CORE_IMAGE_default_no-pic_32) \
-  $(TARGET_OUT_EXECUTABLES)/patchoatd
+  $(TARGET_CORE_IMAGE_default_no-pic_32)
+
+ART_GTEST_oat_file_assistant_test_HOST_DEPS := \
+   $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS) \
+   $(HOST_OUT_EXECUTABLES)/patchoatd
+ART_GTEST_oat_file_assistant_test_TARGET_DEPS := \
+   $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS) \
+   $(TARGET_OUT_EXECUTABLES)/patchoatd
+
+
+ART_GTEST_dex2oat_test_HOST_DEPS := \
+  $(ART_GTEST_dex2oat_environment_tests_HOST_DEPS)
+ART_GTEST_dex2oat_test_TARGET_DEPS := \
+  $(ART_GTEST_dex2oat_environment_tests_TARGET_DEPS)
 
 # TODO: document why this is needed.
 ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32)
@@ -157,6 +171,7 @@
   cmdline/cmdline_parser_test.cc \
   dexdump/dexdump_test.cc \
   dexlist/dexlist_test.cc \
+  dex2oat/dex2oat_test.cc \
   imgdiag/imgdiag_test.cc \
   oatdump/oatdump_test.cc \
   profman/profile_assistant_test.cc \
@@ -277,6 +292,7 @@
   compiler/optimizing/suspend_check_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/intrusive_forward_list_test.cc \
+  compiler/utils/string_reference_test.cc \
   compiler/utils/swap_space_test.cc \
   compiler/utils/test_dex_file_builder_test.cc \
   compiler/utils/transform_array_ref_test.cc \
@@ -302,6 +318,8 @@
   compiler/utils/arm64/managed_register_arm64_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_mips := \
+  compiler/linker/mips/relative_patcher_mips_test.cc \
+  compiler/linker/mips/relative_patcher_mips32r6_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_mips64 := \
 
@@ -807,11 +825,15 @@
 ART_GTEST_oat_file_assistant_test_DEX_DEPS :=
 ART_GTEST_oat_file_assistant_test_HOST_DEPS :=
 ART_GTEST_oat_file_assistant_test_TARGET_DEPS :=
+ART_GTEST_dex2oat_test_DEX_DEPS :=
+ART_GTEST_dex2oat_test_HOST_DEPS :=
+ART_GTEST_dex2oat_test_TARGET_DEPS :=
 ART_GTEST_object_test_DEX_DEPS :=
 ART_GTEST_proxy_test_DEX_DEPS :=
 ART_GTEST_reflection_test_DEX_DEPS :=
 ART_GTEST_stub_test_DEX_DEPS :=
 ART_GTEST_transaction_test_DEX_DEPS :=
+ART_GTEST_dex2oat_environment_tests_DEX_DEPS :=
 ART_VALGRIND_DEPENDENCIES :=
 ART_VALGRIND_TARGET_DEPENDENCIES :=
 $(foreach dir,$(GTEST_DEX_DIRECTORIES), $(eval ART_TEST_TARGET_GTEST_$(dir)_DEX :=))
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 7c53e01..7ded3bf 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -30,18 +30,15 @@
   bool UsuallyEquals(double expected, double actual);
 
   // This has a gtest dependency, which is why it's in the gtest only.
-  bool operator==(const TestProfilerOptions& lhs, const TestProfilerOptions& rhs) {
+  bool operator==(const ProfileSaverOptions& lhs, const ProfileSaverOptions& rhs) {
     return lhs.enabled_ == rhs.enabled_ &&
-        lhs.output_file_name_ == rhs.output_file_name_ &&
-        lhs.period_s_ == rhs.period_s_ &&
-        lhs.duration_s_ == rhs.duration_s_ &&
-        lhs.interval_us_ == rhs.interval_us_ &&
-        UsuallyEquals(lhs.backoff_coefficient_, rhs.backoff_coefficient_) &&
-        UsuallyEquals(lhs.start_immediately_, rhs.start_immediately_) &&
-        UsuallyEquals(lhs.top_k_threshold_, rhs.top_k_threshold_) &&
-        UsuallyEquals(lhs.top_k_change_threshold_, rhs.top_k_change_threshold_) &&
-        lhs.profile_type_ == rhs.profile_type_ &&
-        lhs.max_stack_depth_ == rhs.max_stack_depth_;
+        lhs.min_save_period_ms_ == rhs.min_save_period_ms_ &&
+        lhs.save_resolved_classes_delay_ms_ == rhs.save_resolved_classes_delay_ms_ &&
+        lhs.startup_method_samples_ == rhs.startup_method_samples_ &&
+        lhs.min_methods_to_save_ == rhs.min_methods_to_save_ &&
+        lhs.min_classes_to_save_ == rhs.min_classes_to_save_ &&
+        lhs.min_notification_before_wake_ == rhs.min_notification_before_wake_ &&
+        lhs.max_notification_before_wake_ == rhs.max_notification_before_wake_;
   }
 
   bool UsuallyEquals(double expected, double actual) {
@@ -476,68 +473,21 @@
 }  // TEST_F
 
 /*
-* -X-profile-*
+* -Xps-*
 */
-TEST_F(CmdlineParserTest, TestProfilerOptions) {
- /*
-  * Test successes
-  */
+TEST_F(CmdlineParserTest, ProfileSaverOptions) {
+  ProfileSaverOptions opt = ProfileSaverOptions(true, 1, 2, 3, 4, 5, 6, 7);
 
-  {
-    TestProfilerOptions opt;
-    opt.enabled_ = true;
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xenable-profiler",
-                              M::ProfilerOpts);
-  }
-
-  {
-    TestProfilerOptions opt;
-    // also need to test 'enabled'
-    opt.output_file_name_ = "hello_world.txt";
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xprofile-filename:hello_world.txt ",
-                              M::ProfilerOpts);
-  }
-
-  {
-    TestProfilerOptions opt = TestProfilerOptions();
-    // also need to test 'enabled'
-    opt.output_file_name_ = "output.txt";
-    opt.period_s_ = 123u;
-    opt.duration_s_ = 456u;
-    opt.interval_us_ = 789u;
-    opt.backoff_coefficient_ = 2.0;
-    opt.start_immediately_ = true;
-    opt.top_k_threshold_ = 50.0;
-    opt.top_k_change_threshold_ = 60.0;
-    opt.profile_type_ = kProfilerMethod;
-    opt.max_stack_depth_ = 1337u;
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xprofile-filename:output.txt "
-                              "-Xprofile-period:123 "
-                              "-Xprofile-duration:456 "
-                              "-Xprofile-interval:789 "
-                              "-Xprofile-backoff:2.0 "
-                              "-Xprofile-start-immediately "
-                              "-Xprofile-top-k-threshold:50.0 "
-                              "-Xprofile-top-k-change-threshold:60.0 "
-                              "-Xprofile-type:method "
-                              "-Xprofile-max-stack-depth:1337",
-                              M::ProfilerOpts);
-  }
-
-  {
-    TestProfilerOptions opt = TestProfilerOptions();
-    opt.profile_type_ = kProfilerBoundedStack;
-
-    EXPECT_SINGLE_PARSE_VALUE(opt,
-                              "-Xprofile-type:stack",
-                              M::ProfilerOpts);
-  }
+  EXPECT_SINGLE_PARSE_VALUE(opt,
+                            "-Xjitsaveprofilinginfo "
+                            "-Xps-min-save-period-ms:1 "
+                            "-Xps-save-resolved-classes-delay-ms:2 "
+                            "-Xps-startup-method-samples:3 "
+                            "-Xps-min-methods-to-save:4 "
+                            "-Xps-min-classes-to-save:5 "
+                            "-Xps-min-notification-before-wake:6 "
+                            "-Xps-max-notification-before-wake:7",
+                            M::ProfileSaverOpts);
 }  // TEST_F
 
 /* -Xexperimental:_ */
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 4797540..9b4042c 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -31,7 +31,7 @@
 #include "experimental_flags.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
-#include "profiler_options.h"
+#include "jit/profile_saver_options.h"
 
 namespace art {
 
@@ -633,84 +633,17 @@
   static const char* Name() { return "LogVerbosity"; }
 };
 
-// TODO: Replace with art::ProfilerOptions for the real thing.
-struct TestProfilerOptions {
-  // Whether or not the applications should be profiled.
-  bool enabled_;
-  // Destination file name where the profiling data will be saved into.
-  std::string output_file_name_;
-  // Generate profile every n seconds.
-  uint32_t period_s_;
-  // Run profile for n seconds.
-  uint32_t duration_s_;
-  // Microseconds between samples.
-  uint32_t interval_us_;
-  // Coefficient to exponential backoff.
-  double backoff_coefficient_;
-  // Whether the profile should start upon app startup or be delayed by some random offset.
-  bool start_immediately_;
-  // Top K% of samples that are considered relevant when deciding if the app should be recompiled.
-  double top_k_threshold_;
-  // How much the top K% samples needs to change in order for the app to be recompiled.
-  double top_k_change_threshold_;
-  // The type of profile data dumped to the disk.
-  ProfileDataType profile_type_;
-  // The max depth of the stack collected by the profiler
-  uint32_t max_stack_depth_;
-
-  TestProfilerOptions() :
-    enabled_(false),
-    output_file_name_(),
-    period_s_(0),
-    duration_s_(0),
-    interval_us_(0),
-    backoff_coefficient_(0),
-    start_immediately_(0),
-    top_k_threshold_(0),
-    top_k_change_threshold_(0),
-    profile_type_(ProfileDataType::kProfilerMethod),
-    max_stack_depth_(0) {
-  }
-
-  TestProfilerOptions(const TestProfilerOptions&) = default;
-  TestProfilerOptions(TestProfilerOptions&&) = default;
-};
-
-static inline std::ostream& operator<<(std::ostream& stream, const TestProfilerOptions& options) {
-  stream << "TestProfilerOptions {" << std::endl;
-
-#define PRINT_TO_STREAM(field) \
-  stream << #field << ": '" << options.field << "'" << std::endl;
-
-  PRINT_TO_STREAM(enabled_);
-  PRINT_TO_STREAM(output_file_name_);
-  PRINT_TO_STREAM(period_s_);
-  PRINT_TO_STREAM(duration_s_);
-  PRINT_TO_STREAM(interval_us_);
-  PRINT_TO_STREAM(backoff_coefficient_);
-  PRINT_TO_STREAM(start_immediately_);
-  PRINT_TO_STREAM(top_k_threshold_);
-  PRINT_TO_STREAM(top_k_change_threshold_);
-  PRINT_TO_STREAM(profile_type_);
-  PRINT_TO_STREAM(max_stack_depth_);
-
-  stream << "}";
-
-  return stream;
-#undef PRINT_TO_STREAM
-}
-
 template <>
-struct CmdlineType<TestProfilerOptions> : CmdlineTypeParser<TestProfilerOptions> {
-  using Result = CmdlineParseResult<TestProfilerOptions>;
+struct CmdlineType<ProfileSaverOptions> : CmdlineTypeParser<ProfileSaverOptions> {
+  using Result = CmdlineParseResult<ProfileSaverOptions>;
 
  private:
   using StringResult = CmdlineParseResult<std::string>;
   using DoubleResult = CmdlineParseResult<double>;
 
   template <typename T>
-  static Result ParseInto(TestProfilerOptions& options,
-                          T TestProfilerOptions::*pField,
+  static Result ParseInto(ProfileSaverOptions& options,
+                          T ProfileSaverOptions::*pField,
                           CmdlineParseResult<T>&& result) {
     assert(pField != nullptr);
 
@@ -722,36 +655,6 @@
     return Result::CastError(result);
   }
 
-  template <typename T>
-  static Result ParseIntoRangeCheck(TestProfilerOptions& options,
-                                    T TestProfilerOptions::*pField,
-                                    CmdlineParseResult<T>&& result,
-                                    T min,
-                                    T max) {
-    if (result.IsSuccess()) {
-      const T& value = result.GetValue();
-
-      if (value < min || value > max) {
-        CmdlineParseResult<T> out_of_range = CmdlineParseResult<T>::OutOfRange(value, min, max);
-        return Result::CastError(out_of_range);
-      }
-    }
-
-    return ParseInto(options, pField, std::forward<CmdlineParseResult<T>>(result));
-  }
-
-  static StringResult ParseStringAfterChar(const std::string& s, char c) {
-    std::string parsed_value;
-
-    std::string::size_type colon = s.find(c);
-    if (colon == std::string::npos) {
-      return StringResult::Usage(std::string() + "Missing char " + c + " in option " + s);
-    }
-    // Add one to remove the char we were trimming until.
-    parsed_value = s.substr(colon + 1);
-    return StringResult::Success(parsed_value);
-  }
-
   static std::string RemovePrefix(const std::string& source) {
     size_t prefix_idx = source.find(":");
 
@@ -763,87 +666,64 @@
   }
 
  public:
-  Result ParseAndAppend(const std::string& option, TestProfilerOptions& existing) {
+  Result ParseAndAppend(const std::string& option, ProfileSaverOptions& existing) {
     // Special case which doesn't include a wildcard argument definition.
     // We pass-it through as-is.
-    if (option == "-Xenable-profiler") {
+    if (option == "-Xjitsaveprofilinginfo") {
       existing.enabled_ = true;
       return Result::SuccessNoValue();
     }
 
-    // The rest of these options are always the wildcard from '-Xprofile-*'
+    // The rest of these options are always the wildcard from '-Xps-*'
     std::string suffix = RemovePrefix(option);
 
-    if (StartsWith(option, "filename:")) {
-      CmdlineType<std::string> type_parser;
-
-      return ParseInto(existing,
-                       &TestProfilerOptions::output_file_name_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "period:")) {
+    if (StartsWith(option, "min-save-period-ms:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::period_s_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "duration:")) {
+             &ProfileSaverOptions::min_save_period_ms_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "save-resolved-classes-delay-ms:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::duration_s_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "interval:")) {
+             &ProfileSaverOptions::save_resolved_classes_delay_ms_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "startup-method-samples:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::interval_us_,
-                       type_parser.Parse(suffix));
-    } else if (StartsWith(option, "backoff:")) {
-      CmdlineType<double> type_parser;
-
-      return ParseIntoRangeCheck(existing,
-                                 &TestProfilerOptions::backoff_coefficient_,
-                                 type_parser.Parse(suffix),
-                                 1.0,
-                                 10.0);
-
-    } else if (option == "start-immediately") {
-      existing.start_immediately_ = true;
-      return Result::SuccessNoValue();
-    } else if (StartsWith(option, "top-k-threshold:")) {
-      CmdlineType<double> type_parser;
-
-      return ParseIntoRangeCheck(existing,
-                                 &TestProfilerOptions::top_k_threshold_,
-                                 type_parser.Parse(suffix),
-                                 0.0,
-                                 100.0);
-    } else if (StartsWith(option, "top-k-change-threshold:")) {
-      CmdlineType<double> type_parser;
-
-      return ParseIntoRangeCheck(existing,
-                                 &TestProfilerOptions::top_k_change_threshold_,
-                                 type_parser.Parse(suffix),
-                                 0.0,
-                                 100.0);
-    } else if (option == "type:method") {
-      existing.profile_type_ = kProfilerMethod;
-      return Result::SuccessNoValue();
-    } else if (option == "type:stack") {
-      existing.profile_type_ = kProfilerBoundedStack;
-      return Result::SuccessNoValue();
-    } else if (StartsWith(option, "max-stack-depth:")) {
+             &ProfileSaverOptions::startup_method_samples_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "min-methods-to-save:")) {
       CmdlineType<unsigned int> type_parser;
-
       return ParseInto(existing,
-                       &TestProfilerOptions::max_stack_depth_,
-                       type_parser.Parse(suffix));
+             &ProfileSaverOptions::min_methods_to_save_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "min-classes-to-save:")) {
+      CmdlineType<unsigned int> type_parser;
+      return ParseInto(existing,
+             &ProfileSaverOptions::min_classes_to_save_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "min-notification-before-wake:")) {
+      CmdlineType<unsigned int> type_parser;
+      return ParseInto(existing,
+             &ProfileSaverOptions::min_notification_before_wake_,
+             type_parser.Parse(suffix));
+    }
+    if (StartsWith(option, "max-notification-before-wake:")) {
+      CmdlineType<unsigned int> type_parser;
+      return ParseInto(existing,
+             &ProfileSaverOptions::max_notification_before_wake_,
+             type_parser.Parse(suffix));
     } else {
       return Result::Failure(std::string("Invalid suboption '") + option + "'");
     }
   }
 
-  static const char* Name() { return "TestProfilerOptions"; }
+  static const char* Name() { return "ProfileSaverOptions"; }
   static constexpr bool kCanParseBlankless = true;
 };
 
diff --git a/compiler/Android.mk b/compiler/Android.mk
index e9c22d2..4ec7d72 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -113,8 +113,11 @@
 
 LIBART_COMPILER_SRC_FILES_mips := \
 	jni/quick/mips/calling_convention_mips.cc \
+	linker/mips/relative_patcher_mips.cc \
 	optimizing/code_generator_mips.cc \
+	optimizing/dex_cache_array_fixups_mips.cc \
 	optimizing/intrinsics_mips.cc \
+	optimizing/pc_relative_fixups_mips.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
 
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 9479ff3..2a81804 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -174,6 +174,7 @@
     kCall,
     kCallRelative,     // NOTE: Actual patching is instruction_set-dependent.
     kType,
+    kTypeRelative,     // NOTE: Actual patching is instruction_set-dependent.
     kString,
     kStringRelative,   // NOTE: Actual patching is instruction_set-dependent.
     kDexCacheArray,    // NOTE: Actual patching is instruction_set-dependent.
@@ -215,6 +216,16 @@
     return patch;
   }
 
+  static LinkerPatch RelativeTypePatch(size_t literal_offset,
+                                       const DexFile* target_dex_file,
+                                       uint32_t pc_insn_offset,
+                                       uint32_t target_type_idx) {
+    LinkerPatch patch(literal_offset, Type::kTypeRelative, target_dex_file);
+    patch.type_idx_ = target_type_idx;
+    patch.pc_insn_offset_ = pc_insn_offset;
+    return patch;
+  }
+
   static LinkerPatch StringPatch(size_t literal_offset,
                                  const DexFile* target_dex_file,
                                  uint32_t target_string_idx) {
@@ -258,6 +269,7 @@
   bool IsPcRelative() const {
     switch (GetType()) {
       case Type::kCallRelative:
+      case Type::kTypeRelative:
       case Type::kStringRelative:
       case Type::kDexCacheArray:
         return true;
@@ -274,12 +286,12 @@
   }
 
   const DexFile* TargetTypeDexFile() const {
-    DCHECK(patch_type_ == Type::kType);
+    DCHECK(patch_type_ == Type::kType || patch_type_ == Type::kTypeRelative);
     return target_dex_file_;
   }
 
   uint32_t TargetTypeIndex() const {
-    DCHECK(patch_type_ == Type::kType);
+    DCHECK(patch_type_ == Type::kType || patch_type_ == Type::kTypeRelative);
     return type_idx_;
   }
 
@@ -304,7 +316,9 @@
   }
 
   uint32_t PcInsnOffset() const {
-    DCHECK(patch_type_ == Type::kStringRelative || patch_type_ == Type::kDexCacheArray);
+    DCHECK(patch_type_ == Type::kTypeRelative ||
+           patch_type_ == Type::kStringRelative ||
+           patch_type_ == Type::kDexCacheArray);
     return pc_insn_offset_;
   }
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 131be37..474530a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -47,7 +47,6 @@
 #include "driver/compiler_options.h"
 #include "jni_internal.h"
 #include "object_lock.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
@@ -395,7 +394,7 @@
       dump_passes_(dump_passes),
       timings_logger_(timer),
       compiler_context_(nullptr),
-      support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
+      support_boot_image_fixup_(instruction_set != kMips64),
       dex_files_for_oat_file_(nullptr),
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info),
@@ -2522,28 +2521,11 @@
                                                               true);
     }
     // Create the conflict tables.
-    FillIMTAndConflictTables(klass);
-    return true;
-  }
-
- private:
-  void FillIMTAndConflictTables(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (!klass->ShouldHaveImt()) {
-      return;
-    }
-    if (visited_classes_.find(klass) != visited_classes_.end()) {
-      return;
-    }
-    if (klass->HasSuperClass()) {
-      FillIMTAndConflictTables(klass->GetSuperClass());
-    }
-    if (!klass->IsTemp()) {
+    if (!klass->IsTemp() && klass->ShouldHaveEmbeddedImtAndVTable()) {
       Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass);
     }
-    visited_classes_.insert(klass);
+    return true;
   }
-
-  std::set<mirror::Class*> visited_classes_;
 };
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 26ab281..7f2e193 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -780,9 +780,9 @@
                               EF_MIPS_PIC       |
                               EF_MIPS_CPIC      |
                               EF_MIPS_ABI_O32   |
-                              features->AsMipsInstructionSetFeatures()->IsR6()
-                                  ? EF_MIPS_ARCH_32R6
-                                  : EF_MIPS_ARCH_32R2);
+                              (features->AsMipsInstructionSetFeatures()->IsR6()
+                                   ? EF_MIPS_ARCH_32R6
+                                   : EF_MIPS_ARCH_32R2));
         break;
       }
       case kMips64: {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 063eb11..da10568 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1232,10 +1232,9 @@
       }
       // Assign offsets for all runtime methods in the IMT since these may hold conflict tables
       // live.
-      if (as_klass->ShouldHaveImt()) {
-        ImTable* imt = as_klass->GetImt(target_ptr_size_);
-        for (size_t i = 0; i < ImTable::kSize; ++i) {
-          ArtMethod* imt_method = imt->Get(i, target_ptr_size_);
+      if (as_klass->ShouldHaveEmbeddedImtAndVTable()) {
+        for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+          ArtMethod* imt_method = as_klass->GetEmbeddedImTableEntry(i, target_ptr_size_);
           DCHECK(imt_method != nullptr);
           if (imt_method->IsRuntimeMethod() &&
               !IsInBootImage(imt_method) &&
@@ -1244,11 +1243,6 @@
           }
         }
       }
-
-      if (as_klass->ShouldHaveImt()) {
-        ImTable* imt = as_klass->GetImt(target_ptr_size_);
-        TryAssignImTableOffset(imt, oat_index);
-      }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
       int32_t length = h_obj->AsObjectArray<mirror::Object>()->GetLength();
@@ -1275,23 +1269,6 @@
   return native_object_relocations_.find(ptr) != native_object_relocations_.end();
 }
 
-void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
-  // No offset, or already assigned.
-  if (imt == nullptr || IsInBootImage(imt) || NativeRelocationAssigned(imt)) {
-    return;
-  }
-  // If the method is a conflict method we also want to assign the conflict table offset.
-  ImageInfo& image_info = GetImageInfo(oat_index);
-  const size_t size = ImTable::SizeInBytes(target_ptr_size_);
-  native_object_relocations_.emplace(
-      imt,
-      NativeObjectRelocation {
-          oat_index,
-          image_info.bin_slot_sizes_[kBinImTable],
-          kNativeObjectRelocationTypeIMTable});
-  image_info.bin_slot_sizes_[kBinImTable] += size;
-}
-
 void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) {
   // No offset, or already assigned.
   if (table == nullptr || NativeRelocationAssigned(table)) {
@@ -1414,7 +1391,6 @@
           bin_offset = RoundUp(bin_offset, method_alignment);
           break;
         }
-        case kBinImTable:
         case kBinIMTConflictTable: {
           bin_offset = RoundUp(bin_offset, target_ptr_size_);
           break;
@@ -1485,10 +1461,6 @@
       bin_slot_offsets_[kBinArtMethodClean],
       bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinArtMethodDirty]);
 
-  // IMT section.
-  ImageSection* imt_section = &out_sections[ImageHeader::kSectionImTables];
-  *imt_section = ImageSection(bin_slot_offsets_[kBinImTable], bin_slot_sizes_[kBinImTable]);
-
   // Conflict tables section.
   ImageSection* imt_conflict_tables_section = &out_sections[ImageHeader::kSectionIMTConflictTables];
   *imt_conflict_tables_section = ImageSection(bin_slot_offsets_[kBinIMTConflictTable],
@@ -1613,13 +1585,6 @@
   ImageWriter* const image_writer_;
 };
 
-void ImageWriter::CopyAndFixupImTable(ImTable* orig, ImTable* copy) {
-  for (size_t i = 0; i < ImTable::kSize; ++i) {
-    ArtMethod* method = orig->Get(i, target_ptr_size_);
-    copy->Set(i, NativeLocationInImage(method), target_ptr_size_);
-  }
-}
-
 void ImageWriter::CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy) {
   const size_t count = orig->NumEntries(target_ptr_size_);
   for (size_t i = 0; i < count; ++i) {
@@ -1677,12 +1642,6 @@
       case kNativeObjectRelocationTypeDexCacheArray:
         // Nothing to copy here, everything is done in FixupDexCache().
         break;
-      case kNativeObjectRelocationTypeIMTable: {
-        ImTable* orig_imt = reinterpret_cast<ImTable*>(pair.first);
-        ImTable* dest_imt = reinterpret_cast<ImTable*>(dest);
-        CopyAndFixupImTable(orig_imt, dest_imt);
-        break;
-      }
       case kNativeObjectRelocationTypeIMTConflictTable: {
         auto* orig_table = reinterpret_cast<ImtConflictTable*>(pair.first);
         CopyAndFixupImtConflictTable(
@@ -1891,25 +1850,13 @@
 }
 
 template <typename T>
-std::string PrettyPrint(T* ptr) SHARED_REQUIRES(Locks::mutator_lock_) {
-  std::ostringstream oss;
-  oss << ptr;
-  return oss.str();
-}
-
-template <>
-std::string PrettyPrint(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) {
-  return PrettyMethod(method);
-}
-
-template <typename T>
 T* ImageWriter::NativeLocationInImage(T* obj) {
   if (obj == nullptr || IsInBootImage(obj)) {
     return obj;
   } else {
     auto it = native_object_relocations_.find(obj);
-    CHECK(it != native_object_relocations_.end()) << obj << " " << PrettyPrint(obj)
-        << " spaces " << Runtime::Current()->GetHeap()->DumpSpaces();
+    CHECK(it != native_object_relocations_.end()) << obj << " spaces "
+        << Runtime::Current()->GetHeap()->DumpSpaces();
     const NativeObjectRelocation& relocation = it->second;
     ImageInfo& image_info = GetImageInfo(relocation.oat_index);
     return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset);
@@ -2263,8 +2210,6 @@
       return kBinDexCacheArray;
     case kNativeObjectRelocationTypeRuntimeMethod:
       return kBinRuntimeMethod;
-    case kNativeObjectRelocationTypeIMTable:
-      return kBinImTable;
     case kNativeObjectRelocationTypeIMTConflictTable:
       return kBinIMTConflictTable;
   }
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 1efdc22..51976c5 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -169,8 +169,6 @@
     // ArtMethods may be dirty if the class has native methods or a declaring class that isn't
     // initialized.
     kBinArtMethodDirty,
-    // IMT (clean)
-    kBinImTable,
     // Conflict tables (clean).
     kBinIMTConflictTable,
     // Runtime methods (always clean, do not have a length prefix array).
@@ -193,7 +191,6 @@
     kNativeObjectRelocationTypeArtMethodDirty,
     kNativeObjectRelocationTypeArtMethodArrayDirty,
     kNativeObjectRelocationTypeRuntimeMethod,
-    kNativeObjectRelocationTypeIMTable,
     kNativeObjectRelocationTypeIMTConflictTable,
     kNativeObjectRelocationTypeDexCacheArray,
   };
@@ -404,7 +401,6 @@
   void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy, const ImageInfo& image_info)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CopyAndFixupImTable(ImTable* orig, ImTable* copy) SHARED_REQUIRES(Locks::mutator_lock_);
   void CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupClass(mirror::Class* orig, mirror::Class* copy)
@@ -437,8 +433,6 @@
                           size_t oat_index)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void TryAssignImTableOffset(ImTable* imt, size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_);
-
   // Assign the offset for an IMT conflict table. Does nothing if the table already has a native
   // relocation.
   void TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index)
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 7277107..fdd14be 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -211,11 +211,13 @@
     if ((insn & 0xfffffc00) == 0x91000000) {
       // ADD immediate, 64-bit with imm12 == 0 (unset).
       if (!kEmitCompilerReadBarrier) {
-        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative) << patch.GetType();
+        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+               patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType();
       } else {
         // With the read barrier (non-baker) enabled, it could be kDexCacheArray in the
         // HLoadString::LoadKind::kDexCachePcRelative case of VisitLoadString().
         DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+               patch.GetType() == LinkerPatch::Type::kTypeRelative ||
                patch.GetType() == LinkerPatch::Type::kDexCacheArray) << patch.GetType();
       }
       shift = 0u;  // No shift for ADD.
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
new file mode 100644
index 0000000..7c0423b
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/mips/relative_patcher_mips.h"
+
+#include "compiled_method.h"
+
+namespace art {
+namespace linker {
+
+uint32_t MipsRelativePatcher::ReserveSpace(
+    uint32_t offset,
+    const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
+    MethodReference method_ref ATTRIBUTE_UNUSED) {
+  return offset;  // No space reserved; no limit on relative call distance.
+}
+
+uint32_t MipsRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
+  return offset;  // No space reserved; no limit on relative call distance.
+}
+
+uint32_t MipsRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) {
+  return offset;  // No thunks added; no limit on relative call distance.
+}
+
+void MipsRelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                    uint32_t literal_offset ATTRIBUTE_UNUSED,
+                                    uint32_t patch_offset ATTRIBUTE_UNUSED,
+                                    uint32_t target_offset ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS";
+}
+
+void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                                   const LinkerPatch& patch,
+                                                   uint32_t patch_offset,
+                                                   uint32_t target_offset) {
+  uint32_t anchor_literal_offset = patch.PcInsnOffset();
+  uint32_t literal_offset = patch.LiteralOffset();
+
+  // Basic sanity checks.
+  if (is_r6) {
+    DCHECK_GE(code->size(), 8u);
+    DCHECK_LE(literal_offset, code->size() - 8u);
+    DCHECK_EQ(literal_offset, anchor_literal_offset);
+    // AUIPC reg, offset_high
+    DCHECK_EQ((*code)[literal_offset + 0], 0x34);
+    DCHECK_EQ((*code)[literal_offset + 1], 0x12);
+    DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E);
+    DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC);
+    // ADDIU reg, reg, offset_low
+    DCHECK_EQ((*code)[literal_offset + 4], 0x78);
+    DCHECK_EQ((*code)[literal_offset + 5], 0x56);
+    DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x24);
+  } else {
+    DCHECK_GE(code->size(), 16u);
+    DCHECK_LE(literal_offset, code->size() - 12u);
+    DCHECK_GE(literal_offset, 4u);
+    DCHECK_EQ(literal_offset + 4u, anchor_literal_offset);
+    // NAL
+    DCHECK_EQ((*code)[literal_offset - 4], 0x00);
+    DCHECK_EQ((*code)[literal_offset - 3], 0x00);
+    DCHECK_EQ((*code)[literal_offset - 2], 0x10);
+    DCHECK_EQ((*code)[literal_offset - 1], 0x04);
+    // LUI reg, offset_high
+    DCHECK_EQ((*code)[literal_offset + 0], 0x34);
+    DCHECK_EQ((*code)[literal_offset + 1], 0x12);
+    DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00);
+    DCHECK_EQ((*code)[literal_offset + 3], 0x3C);
+    // ORI reg, reg, offset_low
+    DCHECK_EQ((*code)[literal_offset + 4], 0x78);
+    DCHECK_EQ((*code)[literal_offset + 5], 0x56);
+    DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x34);
+    // ADDU reg, reg, RA
+    DCHECK_EQ((*code)[literal_offset + 8], 0x21);
+    DCHECK_EQ(((*code)[literal_offset + 9] & 0x07), 0x00);
+    DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F);
+    DCHECK_EQ(((*code)[literal_offset + 11] & 0xFC), 0x00);
+  }
+
+  // Apply patch.
+  uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
+  uint32_t diff = target_offset - anchor_offset + kDexCacheArrayLwOffset;
+  if (is_r6) {
+    diff += (diff & 0x8000) << 1;  // Account for sign extension in ADDIU.
+  }
+
+  // LUI reg, offset_high / AUIPC reg, offset_high
+  (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16);
+  (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24);
+  // ORI reg, reg, offset_low / ADDIU reg, reg, offset_low
+  (*code)[literal_offset + 4] = static_cast<uint8_t>(diff >> 0);
+  (*code)[literal_offset + 5] = static_cast<uint8_t>(diff >> 8);
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h
new file mode 100644
index 0000000..4ff2f2f
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
+#define ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
+
+#include "linker/relative_patcher.h"
+#include "arch/mips/instruction_set_features_mips.h"
+
+namespace art {
+namespace linker {
+
+class MipsRelativePatcher FINAL : public RelativePatcher {
+ public:
+  explicit MipsRelativePatcher(const MipsInstructionSetFeatures* features)
+      : is_r6(features->IsR6()) {}
+
+  uint32_t ReserveSpace(uint32_t offset,
+                        const CompiledMethod* compiled_method,
+                        MethodReference method_ref) OVERRIDE;
+  uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
+  uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
+  void PatchCall(std::vector<uint8_t>* code,
+                 uint32_t literal_offset,
+                 uint32_t patch_offset,
+                 uint32_t target_offset) OVERRIDE;
+  void PatchPcRelativeReference(std::vector<uint8_t>* code,
+                                const LinkerPatch& patch,
+                                uint32_t patch_offset,
+                                uint32_t target_offset) OVERRIDE;
+
+ private:
+  // We'll maximize the range of a single load instruction for dex cache array accesses
+  // by aligning offset -32768 with the offset of the first used element.
+  static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+  bool is_r6;
+
+  DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher);
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_
diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
new file mode 100644
index 0000000..0f1dcbc
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/mips/relative_patcher_mips.h"
+
+namespace art {
+namespace linker {
+
+// We'll maximize the range of a single load instruction for dex cache array accesses
+// by aligning offset -32768 with the offset of the first used element.
+static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+
+class Mips32r6RelativePatcherTest : public RelativePatcherTest {
+ public:
+  Mips32r6RelativePatcherTest() : RelativePatcherTest(kMips, "mips32r6") {}
+
+ protected:
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    return result.second;
+  }
+};
+
+TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) {
+  dex_cache_arrays_begin_ = 0x12345678;
+  constexpr size_t kElementOffset = 0x1234;
+  static const uint8_t raw_code[] = {
+      0x34, 0x12, 0x5E, 0xEE,  // auipc s2, high(diff); placeholder = 0x1234
+      0x78, 0x56, 0x52, 0x26,  // addiu s2, s2, low(diff); placeholder = 0x5678
+  };
+  constexpr uint32_t literal_offset = 0;  // At auipc (where patching starts).
+  constexpr uint32_t anchor_offset = literal_offset;  // At auipc (where PC+0 points).
+  ArrayRef<const uint8_t> code(raw_code);
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset),
+  };
+  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) +
+      kDexCacheArrayLwOffset;
+  diff += (diff & 0x8000) << 1;  // Account for sign extension in addiu.
+  static const uint8_t expected_code[] = {
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE,
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26,
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc
new file mode 100644
index 0000000..8391b53
--- /dev/null
+++ b/compiler/linker/mips/relative_patcher_mips_test.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/mips/relative_patcher_mips.h"
+
+namespace art {
+namespace linker {
+
+// We'll maximize the range of a single load instruction for dex cache array accesses
+// by aligning offset -32768 with the offset of the first used element.
+static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+
+class MipsRelativePatcherTest : public RelativePatcherTest {
+ public:
+  MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {}
+
+ protected:
+  uint32_t GetMethodOffset(uint32_t method_idx) {
+    auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+    CHECK(result.first);
+    return result.second;
+  }
+};
+
+TEST_F(MipsRelativePatcherTest, DexCacheReference) {
+  dex_cache_arrays_begin_ = 0x12345678;
+  constexpr size_t kElementOffset = 0x1234;
+  static const uint8_t raw_code[] = {
+      0x00, 0x00, 0x10, 0x04,  // nal
+      0x34, 0x12, 0x12, 0x3C,  // lui  s2, high(diff); placeholder = 0x1234
+      0x78, 0x56, 0x52, 0x36,  // ori  s2, s2, low(diff); placeholder = 0x5678
+      0x21, 0x90, 0x5F, 0x02,  // addu s2, s2, ra
+  };
+  constexpr uint32_t literal_offset = 4;  // At lui (where patching starts).
+  constexpr uint32_t anchor_offset = 8;  // At ori (where PC+0 points).
+  ArrayRef<const uint8_t> code(raw_code);
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset),
+  };
+  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) +
+      kDexCacheArrayLwOffset;
+  static const uint8_t expected_code[] = {
+      0x00, 0x00, 0x10, 0x04,
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C,
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x36,
+      0x21, 0x90, 0x5F, 0x02,
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc
index 3a22983..7765594 100644
--- a/compiler/linker/relative_patcher.cc
+++ b/compiler/linker/relative_patcher.cc
@@ -22,6 +22,9 @@
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "linker/arm64/relative_patcher_arm64.h"
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "linker/mips/relative_patcher_mips.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "linker/x86/relative_patcher_x86.h"
 #endif
@@ -95,6 +98,11 @@
       return std::unique_ptr<RelativePatcher>(
           new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures()));
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips:
+      return std::unique_ptr<RelativePatcher>(
+          new MipsRelativePatcher(features->AsMipsInstructionSetFeatures()));
+#endif
     default:
       return std::unique_ptr<RelativePatcher>(new RelativePatcherNone);
   }
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 6d1f944..672018b 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -444,7 +444,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(132 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(133 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 4232002..cdc7df1 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1066,6 +1066,14 @@
                                                                      target_offset);
                 break;
               }
+              case LinkerPatch::Type::kTypeRelative: {
+                uint32_t target_offset = GetTargetObjectOffset(GetTargetType(patch));
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
+                break;
+              }
               case LinkerPatch::Type::kCall: {
                 uint32_t target_offset = GetTargetOffset(patch);
                 PatchCodeAddress(&patched_code_, literal_offset, target_offset);
@@ -2187,6 +2195,7 @@
                                          oat_dex_file.dex_file_location_checksum_,
                                          /* oat_dex_file */ nullptr,
                                          verify,
+                                         verify,
                                          &error_msg));
     if (dex_files.back() == nullptr) {
       LOG(ERROR) << "Failed to open dex file from oat file. File: " << oat_dex_file.GetLocation()
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index e9fcfe2..1fc247f 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -847,7 +847,7 @@
       }
       // Try index range obtained by induction variable analysis.
       // Disables dynamic bce if OOB is certain.
-      if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) {
+      if (InductionRangeFitsIn(&array_range, bounds_check, &try_dynamic_bce)) {
         ReplaceInstruction(bounds_check, index);
         return;
       }
@@ -912,9 +912,9 @@
 
   static bool HasSameInputAtBackEdges(HPhi* phi) {
     DCHECK(phi->IsLoopHeaderPhi());
-    auto&& inputs = phi->GetInputs();
+    HConstInputsRef inputs = phi->GetInputs();
     // Start with input 1. Input 0 is from the incoming block.
-    HInstruction* input1 = inputs[1];
+    const HInstruction* input1 = inputs[1];
     DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
         *phi->GetBlock()->GetPredecessors()[1]));
     for (size_t i = 2; i < inputs.size(); ++i) {
@@ -1299,33 +1299,30 @@
    * parameter try_dynamic_bce is set to false if OOB is certain.
    */
   bool InductionRangeFitsIn(ValueRange* array_range,
-                            HInstruction* context,
-                            HInstruction* index,
+                            HBoundsCheck* context,
                             bool* try_dynamic_bce) {
     InductionVarRange::Value v1;
     InductionVarRange::Value v2;
     bool needs_finite_test = false;
-    if (induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test)) {
-      do {
-        if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-            v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-          DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-          DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-          ValueRange index_range(GetGraph()->GetArena(),
-                                 ValueBound(v1.instruction, v1.b_constant),
-                                 ValueBound(v2.instruction, v2.b_constant));
-          // If analysis reveals a certain OOB, disable dynamic BCE.
-          if (index_range.GetLower().LessThan(array_range->GetLower()) ||
-              index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
-            *try_dynamic_bce = false;
-            return false;
-          }
-          // Use analysis for static bce only if loop is finite.
-          if (!needs_finite_test && index_range.FitsIn(array_range)) {
-            return true;
-          }
+    HInstruction* index = context->InputAt(0);
+    HInstruction* hint = ValueBound::HuntForDeclaration(context->InputAt(1));
+    if (induction_range_.GetInductionRange(context, index, hint, &v1, &v2, &needs_finite_test)) {
+      if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+          v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+        DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+        DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+        ValueRange index_range(GetGraph()->GetArena(),
+                               ValueBound(v1.instruction, v1.b_constant),
+                               ValueBound(v2.instruction, v2.b_constant));
+        // If analysis reveals a certain OOB, disable dynamic BCE. Otherwise,
+        // use analysis for static bce only if loop is finite.
+        if (index_range.GetLower().LessThan(array_range->GetLower()) ||
+            index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
+          *try_dynamic_bce = false;
+        } else if (!needs_finite_test && index_range.FitsIn(array_range)) {
+          return true;
         }
-      } while (induction_range_.RefineOuter(&v1, &v2));
+      }
     }
     return false;
   }
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 6e851bf..4520f9b 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -111,7 +111,7 @@
         << " " << locations->Out();
   }
 
-  auto&& inputs = instruction->GetInputs();
+  HConstInputsRef inputs = instruction->GetInputs();
   for (size_t i = 0; i < inputs.size(); ++i) {
     DCHECK(CheckType(inputs[i]->GetType(), locations->InAt(i)))
       << inputs[i]->GetType() << " " << locations->InAt(i);
@@ -146,6 +146,13 @@
       : mirror::Array::LengthOffset().Uint32Value();
 }
 
+uint32_t CodeGenerator::GetArrayDataOffset(HArrayGet* array_get) {
+  DCHECK(array_get->GetType() == Primitive::kPrimChar || !array_get->IsStringCharAt());
+  return array_get->IsStringCharAt()
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(Primitive::ComponentSize(array_get->GetType())).Uint32Value();
+}
+
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
   DCHECK_EQ((*block_order_)[current_block_index_], current);
   return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 82a54d2..9364be3 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -345,6 +345,11 @@
   // accessing the String's `count` field in String intrinsics.
   static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
 
+  // Helper that returns the offset of the array's data.
+  // Note: Besides the normal arrays, we also use the HArrayGet for
+  // accessing the String's `value` field in String intrinsics.
+  static uint32_t GetArrayDataOffset(HArrayGet* array_get);
+
   void EmitParallelMoves(Location from1,
                          Location to1,
                          Primitive::Type type1,
@@ -449,10 +454,15 @@
                              SlowPathCode* slow_path) = 0;
 
   // Check if the desired_string_load_kind is supported. If it is, return it,
-  // otherwise return a fall-back info that should be used instead.
+  // otherwise return a fall-back kind that should be used instead.
   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) = 0;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) = 0;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -496,6 +506,20 @@
     LabelType label;
   };
 
+  // Type patch info used for recording locations of required linker patches and
+  // target types. The actual type address can be absolute or PC-relative.
+  // TODO: Consider merging with MethodPatchInfo and StringPatchInfo - all these
+  // classes contain the dex file, some index and the label.
+  template <typename LabelType>
+  struct TypePatchInfo {
+    TypePatchInfo(const DexFile& df, uint32_t index)
+        : dex_file(df), type_index(index), label() { }
+
+    const DexFile& dex_file;
+    uint32_t type_index;
+    LabelType label;
+  };
+
   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
                 size_t number_of_fpu_registers,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index eca9e2c..5316d59 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -176,8 +176,11 @@
         locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    arm_codegen->InvokeRuntime(
-        QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    arm_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -427,7 +430,9 @@
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
+            instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -490,8 +495,12 @@
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
             instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
@@ -504,7 +513,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute the actual memory offset and store it in `index`.
         Register index_reg = index_.AsRegister<Register>();
@@ -552,7 +561,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ AddConstant(index_reg, index_reg, offset_);
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -792,6 +805,9 @@
       boot_image_string_patches_(StringReferenceValueComparator(),
                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
@@ -1873,6 +1889,8 @@
   LocationSummary* locations = invoke->GetLocations();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
@@ -1898,14 +1916,10 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
-  __ LoadFromOffset(kLoadWord, temp, temp,
-        mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kArmPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
-  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   uint32_t entry_point =
       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
+  __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
   // LR();
@@ -4286,11 +4300,11 @@
   Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
   Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -4304,7 +4318,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -4318,7 +4331,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -4332,7 +4344,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -4346,7 +4357,6 @@
     }
 
     case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -4363,7 +4373,6 @@
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       // /* HeapReference<Object> */ out =
       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
@@ -4398,7 +4407,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -4411,7 +4419,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       SRegister out = out_loc.AsFpuRegister<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -4424,7 +4431,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -5077,13 +5083,71 @@
   __ Pop(static_cast<Register>(reg));
 }
 
+HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(R0),
-      /* code_generator_supports_read_barrier */ true);
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(R0),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
@@ -5100,37 +5164,97 @@
 
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    GenerateGcRootFieldLoad(
-        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  } else {
-    // /* GcRoot<mirror::Class>[] */ out =
-    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-    __ LoadFromOffset(kLoadWord,
-                      out,
-                      current_method,
-                      ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-    // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      GenerateGcRootFieldLoad(
+          cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                                    cls->GetTypeIndex()));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      CodeGeneratorARM::PcRelativePatchInfo* labels =
+          codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(out, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(out, out, ShifterOperand(PC));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives
+      // a 128B range. To try and reduce the number of literals if we load multiple types,
+      // simply split the dex cache address to a 128B aligned base loaded from a literal
+      // and the remaining offset embedded in the load.
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2;
+      uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
+      uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
+      __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      Register base_reg = locations->InAt(0).AsRegister<Register>();
+      HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
+      int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
+      // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      __ LoadFromOffset(kLoadWord,
+                        out,
+                        current_method,
+                        ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+      generate_null_check = !cls->IsInDexCache();
+    }
+  }
 
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
-          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
-      if (!cls->IsInDexCache()) {
-        __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
-      }
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
-      } else {
-        __ Bind(slow_path->GetExitLabel());
-      }
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -5262,6 +5386,7 @@
       uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
       uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
       __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      // /* GcRoot<mirror::String> */ out = *(base_address + offset)
       GenerateGcRootFieldLoad(load, out_loc, out, offset);
       break;
     }
@@ -5269,6 +5394,7 @@
       Register base_reg = locations->InAt(0).AsRegister<Register>();
       HArmDexCacheArraysBase* base = load->InputAt(0)->AsArmDexCacheArraysBase();
       int32_t offset = load->GetDexCacheElementOffset() - base->GetElementOffset();
+      // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset)
       GenerateGcRootFieldLoad(load, out_loc, base_reg, offset);
       break;
     }
@@ -6085,8 +6211,9 @@
 
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
+  ScaleFactor no_scale_factor = TIMES_1;
   GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, offset, no_index, temp, needs_null_check);
+      instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check);
 }
 
 void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6099,10 +6226,14 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  ScaleFactor scale_factor = TIMES_4;
   GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, data_offset, index, temp, needs_null_check);
+      instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
 }
 
 void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6110,6 +6241,7 @@
                                                                  Register obj,
                                                                  uint32_t offset,
                                                                  Location index,
+                                                                 ScaleFactor scale_factor,
                                                                  Location temp,
                                                                  bool needs_null_check) {
   DCHECK(kEmitCompilerReadBarrier);
@@ -6164,17 +6296,22 @@
 
   // The actual reference load.
   if (index.IsValid()) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    // /* HeapReference<Object> */ ref =
-    //     *(obj + offset + index * sizeof(HeapReference<Object>))
+    // Load types involving an "index": ArrayGet and
+    // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
     if (index.IsConstant()) {
       size_t computed_offset =
-          (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset;
+          (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
       __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
     } else {
-      __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+      // Handle the special case of the
+      // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use
+      // a register pair as index ("long offset"), of which only the low
+      // part contains data.
+      Register index_reg = index.IsRegisterPair()
+          ? index.AsRegisterPairLow<Register>()
+          : index.AsRegister<Register>();
+      __ add(IP, obj, ShifterOperand(index_reg, LSL, scale_factor));
       __ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
     }
   } else {
@@ -6452,6 +6589,11 @@
   return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
 }
 
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch(
+    const DexFile& dex_file, uint32_t type_index) {
+  return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_);
+}
+
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch(
     const DexFile& dex_file, uint32_t element_offset) {
   return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
@@ -6470,6 +6612,13 @@
       [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
 }
 
+Literal* CodeGeneratorARM::DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                           uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
 Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) {
   bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
   Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
@@ -6489,6 +6638,8 @@
       /* MOVW+MOVT for each base */ 2u * pc_relative_dex_cache_patches_.size() +
       boot_image_string_patches_.size() +
       /* MOVW+MOVT for each base */ 2u * pc_relative_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * pc_relative_type_patches_.size() +
       boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
@@ -6564,6 +6715,35 @@
                                                                add_pc_offset,
                                                                string_index));
   }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    uint32_t type_index = info.offset_or_index;
+    DCHECK(info.add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(info.movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(movw_offset,
+                                                             &dex_file,
+                                                             add_pc_offset,
+                                                             type_index));
+    // Add MOVT patch.
+    DCHECK(info.movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position());
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(movt_offset,
+                                                             &dex_file,
+                                                             add_pc_offset,
+                                                             type_index));
+  }
   for (const auto& entry : boot_image_address_patches_) {
     DCHECK(GetCompilerOptions().GetIncludePatchInformation());
     Literal* literal = entry.second;
@@ -6779,11 +6959,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArmPointerSize).SizeValue();
   } else {
-    __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(),
-        locations->InAt(0).AsRegister<Register>(),
-        mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kArmPointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
   }
   __ LoadFromOffset(kLoadWord,
                     locations->Out().AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 0020f7b..477c4f1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -24,6 +24,7 @@
 #include "parallel_move_resolver.h"
 #include "utils/arm/assembler_thumb2.h"
 #include "utils/string_reference.h"
+#include "utils/type_reference.h"
 
 namespace art {
 namespace arm {
@@ -407,6 +408,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -419,10 +425,10 @@
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
-  // and boot image strings. The only difference is the interpretation of the offset_or_index.
-  // The PC-relative address is loaded with three instructions, MOVW+MOVT
-  // to load the offset to base_reg and then ADD base_reg, PC. The offset is
-  // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // and boot image strings/types. The only difference is the interpretation of the
+  // offset_or_index. The PC-relative address is loaded with three instructions,
+  // MOVW+MOVT to load the offset to base_reg and then ADD base_reg, PC. The offset
+  // is calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
   // currently emit these 3 instructions together, instruction scheduling could
   // split this sequence apart, so we keep separate labels for each of them.
   struct PcRelativePatchInfo {
@@ -431,7 +437,7 @@
     PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
 
     const DexFile& target_dex_file;
-    // Either the dex cache array element offset or the string index.
+    // Either the dex cache array element offset or the string/type index.
     uint32_t offset_or_index;
     Label movw_label;
     Label movt_label;
@@ -439,9 +445,11 @@
   };
 
   PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index);
+  PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index);
   PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                        uint32_t element_offset);
   Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index);
+  Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
   Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
 
@@ -464,6 +472,16 @@
                                              Location index,
                                              Location temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -519,16 +537,6 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 Register obj,
-                                                 uint32_t offset,
-                                                 Location index,
-                                                 Location temp,
-                                                 bool needs_null_check);
-
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
@@ -536,6 +544,9 @@
   using BootStringToLiteralMap = ArenaSafeMap<StringReference,
                                               Literal*,
                                               StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            Literal*,
+                                            TypeReferenceValueComparator>;
 
   Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
@@ -568,6 +579,10 @@
   BootStringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 5d3c8c5..fc2c2c3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -237,8 +237,11 @@
     codegen->EmitParallelMoves(
         locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
         locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
-    arm64_codegen->InvokeRuntime(
-        QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    arm64_codegen->InvokeRuntime(entry_point_offset, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -595,7 +598,9 @@
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
+            instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -658,8 +663,12 @@
     Primitive::Type type = Primitive::kPrimNot;
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
             instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
@@ -677,7 +686,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute the actual memory offset and store it in `index`.
         Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
@@ -725,7 +734,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ Add(index_reg, index_reg, Operand(offset_));
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -920,6 +933,9 @@
       boot_image_string_patches_(StringReferenceValueComparator(),
                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(TypeReferenceValueComparator(),
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
@@ -2051,8 +2067,8 @@
   Register obj = InputRegisterAt(instruction, 0);
   LocationSummary* locations = instruction->GetLocations();
   Location index = locations->InAt(1);
-  uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
   Location out = locations->Out();
+  uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
 
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
@@ -3490,6 +3506,8 @@
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   LocationSummary* locations = invoke->GetLocations();
   Register temp = XRegisterFrom(locations->GetTemp(0));
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   Offset class_offset = mirror::Object::ClassOffset();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
@@ -3519,10 +3537,6 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
-  __ Ldr(temp,
-      MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kArm64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -3725,6 +3739,12 @@
   return NewPcRelativePatch(dex_file, string_index, adrp_label, &pc_relative_string_patches_);
 }
 
+vixl::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(const DexFile& dex_file,
+                                                        uint32_t type_index,
+                                                        vixl::Label* adrp_label) {
+  return NewPcRelativePatch(dex_file, type_index, adrp_label, &pc_relative_type_patches_);
+}
+
 vixl::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                                  uint32_t element_offset,
                                                                  vixl::Label* adrp_label) {
@@ -3751,6 +3771,13 @@
       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
 }
 
+vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
+    const DexFile& dex_file, uint32_t type_index) {
+  return boot_image_type_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
+}
+
 vixl::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(uint64_t address) {
   bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
   Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
@@ -3770,6 +3797,8 @@
       pc_relative_dex_cache_patches_.size() +
       boot_image_string_patches_.size() +
       pc_relative_string_patches_.size() +
+      boot_image_type_patches_.size() +
+      pc_relative_type_patches_.size() +
       boot_image_address_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
@@ -3810,6 +3839,19 @@
                                                                info.pc_insn_label->location(),
                                                                info.offset_or_index));
   }
+  for (const auto& entry : boot_image_type_patches_) {
+    const TypeReference& target_type = entry.first;
+    vixl::Literal<uint32_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::TypePatch(literal->offset(),
+                                                     target_type.dex_file,
+                                                     target_type.type_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_type_patches_) {
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.location(),
+                                                             &info.target_dex_file,
+                                                             info.pc_insn_label->location(),
+                                                             info.offset_or_index));
+  }
   for (const auto& entry : boot_image_address_patches_) {
     DCHECK(GetCompilerOptions().GetIncludePatchInformation());
     vixl::Literal<uint32_t>* literal = entry.second;
@@ -3875,13 +3917,63 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
+HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      LocationFrom(calling_convention.GetRegisterAt(0)),
-      LocationFrom(vixl::x0),
-      /* code_generator_supports_read_barrier */ true);
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        LocationFrom(vixl::x0),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
@@ -3897,35 +3989,111 @@
 
   Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
-  Register current_method = InputRegisterAt(cls, 0);
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    GenerateGcRootFieldLoad(
-        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
-  } else {
-    MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
-    // /* GcRoot<mirror::Class>[] */ out =
-    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-    __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-    // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(
-        cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
-          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
-      if (!cls->IsInDexCache()) {
-        __ Cbz(out, slow_path->GetEntryLabel());
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      Register current_method = InputRegisterAt(cls, 0);
+      GenerateGcRootFieldLoad(
+          cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
+                                                            cls->GetTypeIndex()));
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      // Add ADRP with its PC-relative type patch.
+      const DexFile& dex_file = cls->GetDexFile();
+      uint32_t type_index = cls->GetTypeIndex();
+      vixl::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
+      {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(out.X(), /* offset placeholder */ 0);
       }
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
-      } else {
-        __ Bind(slow_path->GetExitLabel());
+      // Add ADD with its PC-relative type patch.
+      vixl::Label* add_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
+      {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(add_label);
+        __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0));
       }
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK(cls->GetAddress() != 0u && IsUint<32>(cls->GetAddress()));
+      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress()));
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads
+      // that gives a 16KiB range. To try and reduce the number of literals if we load
+      // multiple types, simply split the dex cache address to a 16KiB aligned base
+      // loaded from a literal and the remaining offset embedded in the load.
+      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
+      DCHECK_ALIGNED(cls->GetAddress(), 4u);
+      constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2;
+      uint64_t base_address = cls->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
+      uint32_t offset = cls->GetAddress() & MaxInt<uint64_t>(offset_bits);
+      __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+      GenerateGcRootFieldLoad(cls, out_loc, out.X(), offset);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      // Add ADRP with its PC-relative DexCache access patch.
+      const DexFile& dex_file = cls->GetDexFile();
+      uint32_t element_offset = cls->GetDexCacheElementOffset();
+      vixl::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+      {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(adrp_label);
+        __ adrp(out.X(), /* offset placeholder */ 0);
+      }
+      // Add LDR with its PC-relative DexCache access patch.
+      vixl::Label* ldr_label =
+          codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
+      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
+      GenerateGcRootFieldLoad(cls, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      MemberOffset resolved_types_offset =
+          ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      Register current_method = InputRegisterAt(cls, 0);
+      __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      GenerateGcRootFieldLoad(
+          cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ Cbz(out, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -4046,6 +4214,7 @@
       uint64_t base_address = load->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
       uint32_t offset = load->GetAddress() & MaxInt<uint64_t>(offset_bits);
       __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
+      // /* GcRoot<mirror::String> */ out = *(base_address + offset)
       GenerateGcRootFieldLoad(load, out_loc, out.X(), offset);
       break;
     }
@@ -4062,6 +4231,7 @@
       // Add LDR with its PC-relative DexCache access patch.
       vixl::Label* ldr_label =
           codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
+      // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
       GenerateGcRootFieldLoad(load, out_loc, out.X(), /* offset placeholder */ 0, ldr_label);
       break;
     }
@@ -4940,8 +5110,16 @@
 
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
-  GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire);
+  size_t no_scale_factor = 0U;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            offset,
+                                            no_index,
+                                            no_scale_factor,
+                                            temp,
+                                            needs_null_check,
+                                            use_load_acquire);
 }
 
 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -4958,10 +5136,21 @@
   // never use Load-Acquire instructions on ARM64.
   const bool use_load_acquire = false;
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  GenerateReferenceLoadWithBakerReadBarrier(
-      instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire);
+  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            data_offset,
+                                            index,
+                                            scale_factor,
+                                            temp,
+                                            needs_null_check,
+                                            use_load_acquire);
 }
 
 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -4969,15 +5158,16 @@
                                                                    vixl::Register obj,
                                                                    uint32_t offset,
                                                                    Location index,
+                                                                   size_t scale_factor,
                                                                    Register temp,
                                                                    bool needs_null_check,
                                                                    bool use_load_acquire) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
-  // If `index` is a valid location, then we are emitting an array
-  // load, so we shouldn't be using a Load Acquire instruction.
-  // In other words: `index.IsValid()` => `!use_load_acquire`.
-  DCHECK(!index.IsValid() || !use_load_acquire);
+  // If we are emitting an array load, we should not be using a
+  // Load Acquire instruction.  In other words:
+  // `instruction->IsArrayGet()` => `!use_load_acquire`.
+  DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
 
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
@@ -5034,20 +5224,33 @@
 
   // The actual reference load.
   if (index.IsValid()) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    // /* HeapReference<Object> */ ref =
-    //     *(obj + offset + index * sizeof(HeapReference<Object>))
-    const size_t shift_amount = Primitive::ComponentSizeShift(type);
-    if (index.IsConstant()) {
-      uint32_t computed_offset = offset + (Int64ConstantFrom(index) << shift_amount);
-      Load(type, ref_reg, HeapOperand(obj, computed_offset));
+    // Load types involving an "index".
+    if (use_load_acquire) {
+      // UnsafeGetObjectVolatile intrinsic case.
+      // Register `index` is not an index in an object array, but an
+      // offset to an object reference field within object `obj`.
+      DCHECK(instruction->IsInvoke()) << instruction->DebugName();
+      DCHECK(instruction->GetLocations()->Intrinsified());
+      DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
+          << instruction->AsInvoke()->GetIntrinsic();
+      DCHECK_EQ(offset, 0U);
+      DCHECK_EQ(scale_factor, 0U);
+      DCHECK_EQ(needs_null_check, 0U);
+      // /* HeapReference<Object> */ ref = *(obj + index)
+      MemOperand field = HeapOperand(obj, XRegisterFrom(index));
+      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
     } else {
-      temp2 = temps.AcquireW();
-      __ Add(temp2, obj, offset);
-      Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, shift_amount));
-      temps.Release(temp2);
+      // ArrayGet and UnsafeGetObject intrinsics cases.
+      // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+      if (index.IsConstant()) {
+        uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
+        Load(type, ref_reg, HeapOperand(obj, computed_offset));
+      } else {
+        temp2 = temps.AcquireW();
+        __ Add(temp2, obj, offset);
+        Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
+        temps.Release(temp2);
+      }
     }
   } else {
     // /* HeapReference<Object> */ ref = *(obj + offset)
@@ -5150,10 +5353,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kArm64PointerSize).SizeValue();
   } else {
-    __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
-        mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kArm64PointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
   }
   __ Ldr(XRegisterFrom(locations->Out()),
          MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 422963e..d4bf695 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -26,6 +26,7 @@
 #include "parallel_move_resolver.h"
 #include "utils/arm64/assembler_arm64.h"
 #include "utils/string_reference.h"
+#include "utils/type_reference.h"
 #include "vixl/a64/disasm-a64.h"
 #include "vixl/a64/macro-assembler-a64.h"
 
@@ -460,6 +461,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -482,6 +488,14 @@
                                         uint32_t string_index,
                                         vixl::Label* adrp_label = nullptr);
 
+  // Add a new PC-relative type patch for an instruction and return the label
+  // to be bound before the instruction. The instruction will be either the
+  // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+  // to the associated ADRP patch label).
+  vixl::Label* NewPcRelativeTypePatch(const DexFile& dex_file,
+                                      uint32_t type_index,
+                                      vixl::Label* adrp_label = nullptr);
+
   // Add a new PC-relative dex cache array patch for an instruction and return
   // the label to be bound before the instruction. The instruction will be
   // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
@@ -492,6 +506,8 @@
 
   vixl::Literal<uint32_t>* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
                                                              uint32_t string_index);
+  vixl::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
+                                                           uint32_t type_index);
   vixl::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
   vixl::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
 
@@ -515,6 +531,17 @@
                                              Location index,
                                              vixl::Register temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 size_t scale_factor,
+                                                 vixl::Register temp,
+                                                 bool needs_null_check,
+                                                 bool use_load_acquire);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -570,17 +597,6 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 vixl::Register obj,
-                                                 uint32_t offset,
-                                                 Location index,
-                                                 vixl::Register temp,
-                                                 bool needs_null_check,
-                                                 bool use_load_acquire);
-
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
   using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::Literal<uint32_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
@@ -589,6 +605,9 @@
   using BootStringToLiteralMap = ArenaSafeMap<StringReference,
                                               vixl::Literal<uint32_t>*,
                                               StringReferenceValueComparator>;
+  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                            vixl::Literal<uint32_t>*,
+                                            TypeReferenceValueComparator>;
 
   vixl::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   vixl::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
@@ -598,13 +617,14 @@
   vixl::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
 
   // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
-  // and boot image strings. The only difference is the interpretation of the offset_or_index.
+  // and boot image strings/types. The only difference is the interpretation of the
+  // offset_or_index.
   struct PcRelativePatchInfo {
     PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
         : target_dex_file(dex_file), offset_or_index(off_or_idx), label(), pc_insn_label() { }
 
     const DexFile& target_dex_file;
-    // Either the dex cache array element offset or the string index.
+    // Either the dex cache array element offset or the string/type index.
     uint32_t offset_or_index;
     vixl::Label label;
     vixl::Label* pc_insn_label;
@@ -646,6 +666,10 @@
   BootStringToLiteralMap boot_image_string_patches_;
   // PC-relative String patch info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
+  BootTypeToLiteralMap boot_image_type_patches_;
+  // PC-relative type patch info.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // Deduplication map for patchable boot image addresses.
   Uint32ToLiteralMap boot_image_address_patches_;
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index d5bad28..37f1c35 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -39,6 +39,10 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = A0;
 
+// We'll maximize the range of a single load instruction for dex cache array accesses
+// by aligning offset -32768 with the offset of the first used element.
+static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
+
 Location MipsReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
@@ -166,11 +170,15 @@
                                locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimInt);
-    mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    mips_codegen->InvokeRuntime(entry_point_offset,
                                 instruction_,
                                 instruction_->GetDexPc(),
                                 this,
                                 IsDirectEntrypoint(kQuickThrowArrayBounds));
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -473,7 +481,12 @@
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
       assembler_(graph->GetArena(), &isa_features),
-      isa_features_(isa_features) {
+      isa_features_(isa_features),
+      method_patches_(MethodReferenceComparator(),
+                      graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      call_patches_(MethodReferenceComparator(),
+                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
 }
@@ -944,6 +957,71 @@
   }
 }
 
+void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+  DCHECK(linker_patches->empty());
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      pc_relative_dex_cache_patches_.size();
+  linker_patches->reserve(size);
+  for (const auto& entry : method_patches_) {
+    const MethodReference& target_method = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
+                                                       target_method.dex_file,
+                                                       target_method.dex_method_index));
+  }
+  for (const auto& entry : call_patches_) {
+    const MethodReference& target_method = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
+    linker_patches->push_back(LinkerPatch::CodePatch(literal_offset,
+                                                     target_method.dex_file,
+                                                     target_method.dex_method_index));
+  }
+  for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
+    const DexFile& dex_file = info.target_dex_file;
+    size_t base_element_offset = info.offset_or_index;
+    DCHECK(info.high_label.IsBound());
+    uint32_t high_offset = __ GetLabelLocation(&info.high_label);
+    DCHECK(info.pc_rel_label.IsBound());
+    uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label);
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset,
+                                                              &dex_file,
+                                                              pc_rel_offset,
+                                                              base_element_offset));
+  }
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
+    const DexFile& dex_file, uint32_t element_offset) {
+  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch(
+    const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
+  patches->emplace_back(dex_file, offset_or_index);
+  return &patches->back();
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method,
+                                                     MethodToLiteralMap* map) {
+  return map->GetOrCreate(
+      target_method,
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateMethodAddressLiteral(MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &method_patches_);
+}
+
+Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &call_patches_);
+}
+
 void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
   MipsLabel done;
   Register card = AT;
@@ -1635,11 +1713,11 @@
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1653,7 +1731,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1667,7 +1744,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1682,7 +1758,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1699,7 +1774,6 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1714,7 +1788,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       Register out = locations->Out().AsRegisterPairLow<Register>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1729,7 +1802,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       FRegister out = locations->Out().AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1744,7 +1816,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       FRegister out = locations->Out().AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -3701,6 +3772,8 @@
 void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize);
@@ -3717,10 +3790,6 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ LoadFromOffset(kLoadWord, temp, temp,
-      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kMipsPointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3746,12 +3815,38 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
+  HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
+  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+
+  // kDirectAddressWithFixup and kCallDirectWithFixup need no extra input on R6 because
+  // R6 has PC-relative addressing.
+  bool has_extra_input = !isR6 &&
+      ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) ||
+       (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup));
+
+  if (invoke->HasPcRelativeDexCache()) {
+    // kDexCachePcRelative is mutually exclusive with
+    // kDirectAddressWithFixup/kCallDirectWithFixup.
+    CHECK(!has_extra_input);
+    has_extra_input = true;
+  }
+
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && has_extra_input) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
+
+  // Add the extra input register if either the dex cache array base register
+  // or the PC-relative base register for accessing literals is needed.
+  if (has_extra_input) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) {
@@ -3769,42 +3864,110 @@
   return HLoadString::LoadKind::kDexCacheViaMethod;
 }
 
+HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass);
+  // TODO: Implement other kinds.
+  return HLoadClass::LoadKind::kDexCacheViaMethod;
+}
+
+Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                  Register temp) {
+  CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
-  switch (desired_dispatch_info.method_load_kind) {
+  HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // is incompatible with it.
+  bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
+  bool fallback_load = true;
+  bool fallback_call = true;
+  switch (dispatch_info.method_load_kind) {
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement these types. For the moment, we fall back to kDexCacheViaMethod.
-      return HInvokeStaticOrDirect::DispatchInfo {
-        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        0u,
-        0u
-      };
+      fallback_load = has_irreducible_loops;
+      break;
     default:
+      fallback_load = false;
       break;
   }
-  switch (desired_dispatch_info.code_ptr_location) {
+  switch (dispatch_info.code_ptr_location) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      fallback_call = has_irreducible_loops;
+      break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      // TODO: Implement these types. For the moment, we fall back to kCallArtMethod.
-      return HInvokeStaticOrDirect::DispatchInfo {
-        desired_dispatch_info.method_load_kind,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        desired_dispatch_info.method_load_data,
-        0u
-      };
+      // TODO: Implement this type.
+      break;
     default:
-      return desired_dispatch_info;
+      fallback_call = false;
+      break;
   }
+  if (fallback_load) {
+    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+    dispatch_info.method_load_data = 0;
+  }
+  if (fallback_call) {
+    dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+    dispatch_info.direct_code_ptr = 0;
+  }
+  return dispatch_info;
 }
 
 void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // All registers are assumed to be correctly set up per the calling convention.
-
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
-  switch (invoke->GetMethodLoadKind()) {
+  HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
+  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
+  bool isR6 = isa_features_.IsR6();
+  // kDirectAddressWithFixup and kCallDirectWithFixup have no extra input on R6 because
+  // R6 has PC-relative addressing.
+  bool has_extra_input = invoke->HasPcRelativeDexCache() ||
+      (!isR6 &&
+       ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) ||
+        (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup)));
+  Register base_reg = has_extra_input
+      ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>())
+      : ZERO;
+
+  // For better instruction scheduling we load the direct code pointer before the method pointer.
+  switch (code_ptr_location) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // T9 = invoke->GetDirectCodePtr();
+      __ LoadConst32(T9, invoke->GetDirectCodePtr());
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // T9 = code address from literal pool with link-time patch.
+      __ LoadLiteral(T9, base_reg, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod()));
+      break;
+    default:
+      break;
+  }
+
+  switch (method_load_kind) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
       __ LoadFromOffset(kLoadWord,
@@ -3819,11 +3982,18 @@
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement these types.
-      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+      __ LoadLiteral(temp.AsRegister<Register>(),
+                     base_reg,
+                     DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HMipsDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
+      int32_t offset =
+          invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = temp.AsRegister<Register>();
@@ -3854,20 +4024,19 @@
     }
   }
 
-  switch (invoke->GetCodePtrLocation()) {
+  switch (code_ptr_location) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
-      __ Jalr(&frame_entry_label_, T9);
+      __ Bal(&frame_entry_label_);
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
-      // LR = invoke->GetDirectCodePtr();
-      __ LoadConst32(T9, invoke->GetDirectCodePtr());
-      // LR()
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // T9 prepared above for better instruction scheduling.
+      // T9()
       __ Jalr(T9);
       __ Nop();
       break;
-    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
-      // TODO: Implement these types.
+      // TODO: Implement this type.
       // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
       LOG(FATAL) << "Unsupported";
       UNREACHABLE();
@@ -4726,7 +4895,6 @@
   Primitive::Type input_type = conversion->GetInputType();
   bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
   bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
-  bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
 
   DCHECK_NE(input_type, result_type);
 
@@ -4735,7 +4903,9 @@
     Register dst_low = locations->Out().AsRegisterPairLow<Register>();
     Register src = locations->InAt(0).AsRegister<Register>();
 
-    __ Move(dst_low, src);
+    if (dst_low != src) {
+      __ Move(dst_low, src);
+    }
     __ Sra(dst_high, src, 31);
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
     Register dst = locations->Out().AsRegister<Register>();
@@ -4764,7 +4934,9 @@
         }
         break;
       case Primitive::kPrimInt:
-        __ Move(dst, src);
+        if (dst != src) {
+          __ Move(dst, src);
+        }
         break;
 
       default:
@@ -4921,11 +5093,7 @@
         uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
         __ LoadConst32(TMP, High32Bits(min_val));
         __ Mtc1(ZERO, FTMP);
-        if (fpu_32bit) {
-          __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1));
-        } else {
-          __ Mthc1(TMP, FTMP);
-        }
+        __ MoveToFpuHigh(TMP, FTMP);
       }
 
       if (isR6) {
@@ -5139,6 +5307,57 @@
   }
 }
 
+void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress(
+    HMipsComputeBaseMethodAddress* insn) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress(
+    HMipsComputeBaseMethodAddress* insn) {
+  LocationSummary* locations = insn->GetLocations();
+  Register reg = locations->Out().AsRegister<Register>();
+
+  CHECK(!codegen_->GetInstructionSetFeatures().IsR6());
+
+  // Generate a dummy PC-relative call to obtain PC.
+  __ Nal();
+  // Grab the return address off RA.
+  __ Move(reg, RA);
+
+  // Remember this offset (the obtained PC value) for later use with constant area.
+  __ BindPcRelBaseLabel();
+}
+
+void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) {
+  Register reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorMIPS::PcRelativePatchInfo* info =
+      codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
+
+  if (codegen_->GetInstructionSetFeatures().IsR6()) {
+    __ Bind(&info->high_label);
+    __ Bind(&info->pc_rel_label);
+    // Add a 32-bit offset to PC.
+    __ Auipc(reg, /* placeholder */ 0x1234);
+    __ Addiu(reg, reg, /* placeholder */ 0x5678);
+  } else {
+    // Generate a dummy PC-relative call to obtain PC.
+    __ Nal();
+    __ Bind(&info->high_label);
+    __ Lui(reg, /* placeholder */ 0x1234);
+    __ Bind(&info->pc_rel_label);
+    __ Ori(reg, reg, /* placeholder */ 0x5678);
+    // Add a 32-bit offset to PC.
+    __ Addu(reg, reg, RA);
+  }
+}
+
 void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   // The trampoline uses the same calling convention as dex calling conventions,
   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
@@ -5164,12 +5383,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kMipsPointerSize).SizeValue();
   } else {
-    __ LoadFromOffset(kLoadWord,
-                      locations->Out().AsRegister<Register>(),
-                      locations->InAt(0).AsRegister<Register>(),
-                      mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kMipsPointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value();
   }
   __ LoadFromOffset(kLoadWord,
                     locations->Out().AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 8c0bae6..08f74c0 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -285,6 +285,9 @@
   MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; }
   const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; }
 
+  // Emit linker patches.
+  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+
   void MarkGCCard(Register object, Register value);
 
   // Register allocation.
@@ -349,6 +352,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -367,7 +375,39 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
 
+  // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
+  // and boot image strings. The only difference is the interpretation of the offset_or_index.
+  struct PcRelativePatchInfo {
+    PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx)
+        : target_dex_file(dex_file), offset_or_index(off_or_idx) { }
+    PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
+
+    const DexFile& target_dex_file;
+    // Either the dex cache array element offset or the string index.
+    uint32_t offset_or_index;
+    // Label for the instruction loading the most significant half of the offset that's added to PC
+    // to form the base address (the least significant half is loaded with the instruction that
+    // follows).
+    MipsLabel high_label;
+    // Label for the instruction corresponding to PC+0.
+    MipsLabel pc_rel_label;
+  };
+
+  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
+                                                       uint32_t element_offset);
+
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
+  using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+
+  Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
+  Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
+  Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
+  PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
+                                          uint32_t offset_or_index,
+                                          ArenaDeque<PcRelativePatchInfo>* patches);
+
   // Labels for each block that will be compiled.
   MipsLabel* block_labels_;
   MipsLabel frame_entry_label_;
@@ -377,6 +417,12 @@
   MipsAssembler assembler_;
   const MipsInstructionSetFeatures& isa_features_;
 
+  // Method patch info, map MethodReference to a literal for method address and method code.
+  MethodToLiteralMap method_patches_;
+  MethodToLiteralMap call_patches_;
+  // PC-relative patch info for each HMipsDexCacheArraysBase.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS);
 };
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 539abf1..2e78884 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -127,10 +127,14 @@
                                locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimInt);
-    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    mips64_codegen->InvokeRuntime(entry_point_offset,
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -918,13 +922,13 @@
 
   // TODO: review; anything else?
 
-  // TODO: remove once all the issues with register saving/restoring are sorted out.
-  for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-    blocked_core_registers_[kCoreCalleeSaves[i]] = true;
-  }
-
-  for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-    blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+  if (GetGraph()->IsDebuggable()) {
+    // Stubs do not save callee-save floating point registers. If the graph
+    // is debuggable, we need to deal with these registers differently. For
+    // now, just block them.
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+    }
   }
 }
 
@@ -1289,11 +1293,11 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1307,7 +1311,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1321,7 +1324,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1336,7 +1338,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1353,7 +1354,6 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord;
       if (index.IsConstant()) {
@@ -1369,7 +1369,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       GpuRegister out = locations->Out().AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1384,7 +1383,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -1399,7 +1397,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
@@ -2935,6 +2932,8 @@
 void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
   GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
   Location receiver = invoke->GetLocations()->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize);
@@ -2951,10 +2950,6 @@
     __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ LoadFromOffset(kLoadDoubleword, temp, temp,
-      mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kMips64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -2986,19 +2981,6 @@
   }
 
   HandleInvoke(invoke);
-
-  // While SetupBlockedRegisters() blocks registers S2-S8 due to their
-  // clobbering somewhere else, reduce further register pressure by avoiding
-  // allocation of a register for the current method pointer like on x86 baseline.
-  // TODO: remove this once all the issues with register saving/restoring are
-  // sorted out.
-  if (invoke->HasCurrentMethodInput()) {
-    LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetSpecialInputIndex());
-    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
-    }
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) {
@@ -3016,6 +2998,13 @@
   return HLoadString::LoadKind::kDexCacheViaMethod;
 }
 
+HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass);
+  // TODO: Implement other kinds.
+  return HLoadClass::LoadKind::kDexCacheViaMethod;
+}
+
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 9785a2e..4b462cc 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -340,6 +340,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a21c295..1261619 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -148,10 +148,14 @@
         locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    x86_codegen->InvokeRuntime(entry_point_offset,
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -444,7 +448,9 @@
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
+            instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -507,8 +513,12 @@
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
             instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
@@ -521,7 +531,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute the actual memory offset and store it in `index`.
         Register index_reg = index_.AsRegister<Register>();
@@ -569,7 +579,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ AddImmediate(index_reg, Immediate(offset_));
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -804,6 +818,7 @@
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       constant_area_start_(-1),
       fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_address_offset_(-1) {
@@ -2012,6 +2027,8 @@
   LocationSummary* locations = invoke->GetLocations();
   Register temp = locations->GetTemp(0).AsRegister<Register>();
   XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
@@ -2038,12 +2055,7 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
-  // temp = temp->GetAddressOfIMT()
-  __ movl(temp,
-      Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
   // temp = temp->GetImtEntryAt(method_offset);
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kX86PointerSize));
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
   __ call(Address(temp,
@@ -4063,12 +4075,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86PointerSize).SizeValue();
   } else {
-    __ movl(locations->InAt(0).AsRegister<Register>(),
-        Address(locations->InAt(0).AsRegister<Register>(),
-        mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
-    // temp = temp->GetImtEntryAt(method_offset);
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kX86PointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
   }
   __ movl(locations->Out().AsRegister<Register>(),
           Address(locations->InAt(0).AsRegister<Register>(), method_offset));
@@ -4452,6 +4460,11 @@
   __ Bind(&string_patches_.back().label);
 }
 
+void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) {
+  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex());
+  __ Bind(&type_patches_.back().label);
+}
+
 Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                          uint32_t element_offset) {
   // Add the patch entry and bind its label at the end of the instruction.
@@ -4466,7 +4479,8 @@
       relative_call_patches_.size() +
       pc_relative_dex_cache_patches_.size() +
       simple_patches_.size() +
-      string_patches_.size();
+      string_patches_.size() +
+      type_patches_.size();
   linker_patches->reserve(size);
   // The label points to the end of the "movl" insn but the literal offset for method
   // patch needs to point to the embedded constant which occupies the last 4 bytes.
@@ -4502,6 +4516,13 @@
                                                                  GetMethodAddressOffset(),
                                                                  info.string_index));
     }
+    for (const TypePatchInfo<Label>& info : type_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset,
+                                                               &info.dex_file,
+                                                               GetMethodAddressOffset(),
+                                                               info.type_index));
+    }
   } else {
     for (const StringPatchInfo<Label>& info : string_patches_) {
       uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
@@ -4509,6 +4530,12 @@
                                                          &info.dex_file,
                                                          info.string_index));
     }
+    for (const TypePatchInfo<Label>& info : type_patches_) {
+      uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+      linker_patches->push_back(LinkerPatch::TypePatch(literal_offset,
+                                                       &info.dex_file,
+                                                       info.type_index));
+    }
   }
 }
 
@@ -5040,11 +5067,11 @@
   Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
   Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
@@ -5056,7 +5083,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
@@ -5068,7 +5094,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
@@ -5080,7 +5105,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
@@ -5092,7 +5116,6 @@
     }
 
     case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
@@ -5107,7 +5130,6 @@
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       // /* HeapReference<Object> */ out =
       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
@@ -5141,7 +5163,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -5159,7 +5180,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
@@ -5171,7 +5191,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
@@ -5873,13 +5892,72 @@
   __ popl(static_cast<Register>(reg));
 }
 
+HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      FALLTHROUGH_INTENDED;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());  // Note: boot image is also non-JIT.
+      // We disable pc-relative load when there is an irreducible loop, as the optimization
+      // is incompatible with it.
+      // TODO: Create as many X86ComputeBaseMethodAddress instructions as needed for methods
+      // with irreducible loops.
+      if (GetGraph()->HasIrreducibleLoops()) {
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(EAX),
-      /* code_generator_supports_read_barrier */ true);
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(EAX),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
+      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
@@ -5896,39 +5974,86 @@
 
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    GenerateGcRootFieldLoad(
-        cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    // /* GcRoot<mirror::Class>[] */ out =
-    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-    __ movl(out, Address(current_method,
-                         ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
-    // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(
-        cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ movl(out, Immediate(/* placeholder */ 0));
+      codegen_->RecordTypePatch(cls);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      Register method_address = locations->InAt(0).AsRegister<Register>();
+      __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
+      codegen_->RecordTypePatch(cls);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ movl(out, Immediate(address));
+      codegen_->RecordSimplePatch();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      // /* GcRoot<mirror::Class> */ out = *address
+      GenerateGcRootFieldLoad(cls, out_loc, Address::Absolute(address));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      Register base_reg = locations->InAt(0).AsRegister<Register>();
+      uint32_t offset = cls->GetDexCacheElementOffset();
+      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset);
+      // /* GcRoot<mirror::Class> */ out = *(base + offset)  /* PC-relative */
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      Register current_method = locations->InAt(0).AsRegister<Register>();
+      __ movl(out, Address(current_method,
+                           ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+  }
 
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
-          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
 
-      if (!cls->IsInDexCache()) {
-        __ testl(out, out);
-        __ j(kEqual, slow_path->GetEntryLabel());
-      }
+    if (generate_null_check) {
+      __ testl(out, out);
+      __ j(kEqual, slow_path->GetEntryLabel());
+    }
 
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
-      } else {
-        __ Bind(slow_path->GetExitLabel());
-      }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -6045,6 +6170,7 @@
     case HLoadString::LoadKind::kDexCacheAddress: {
       DCHECK_NE(load->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
+      // /* GcRoot<mirror::String> */ out = *address
       GenerateGcRootFieldLoad(load, out_loc, Address::Absolute(address));
       break;
     }
@@ -6052,6 +6178,7 @@
       Register base_reg = locations->InAt(0).AsRegister<Register>();
       uint32_t offset = load->GetDexCacheElementOffset();
       Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
+      // /* GcRoot<mirror::String> */ out = *(base + offset)  /* PC-relative */
       GenerateGcRootFieldLoad(
           load, out_loc, Address(base_reg, CodeGeneratorX86::kDummy32BitOffset), fixup_label);
       break;
@@ -6853,6 +6980,9 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   Address src = index.IsConstant() ?
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 98dc8ca..2a9fb80 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -391,6 +391,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -405,6 +410,7 @@
 
   void RecordSimplePatch();
   void RecordStringPatch(HLoadString* load_string);
+  void RecordTypePatch(HLoadClass* load_class);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
@@ -485,6 +491,14 @@
                                              Location index,
                                              Location temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -555,15 +569,6 @@
   static constexpr int32_t kDummy32BitOffset = 256;
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 Register obj,
-                                                 const Address& src,
-                                                 Location temp,
-                                                 bool needs_null_check);
-
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   struct PcRelativeDexCacheAccessInfo {
@@ -594,6 +599,8 @@
   ArenaDeque<Label> simple_patches_;
   // String patch locations.
   ArenaDeque<StringPatchInfo<Label>> string_patches_;
+  // Type patch locations.
+  ArenaDeque<TypePatchInfo<Label>> type_patches_;
 
   // Offset to the start of the constant area in the assembled code.
   // Used for fixups to the constant area.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 135f0c4..5e30203 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -204,10 +204,14 @@
         locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+    uint32_t entry_point_offset = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? QUICK_ENTRY_POINT(pThrowStringBounds)
+        : QUICK_ENTRY_POINT(pThrowArrayBounds);
+    x86_64_codegen->InvokeRuntime(entry_point_offset,
                                   instruction_,
                                   instruction_->GetDexPc(),
                                   this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
@@ -465,7 +469,9 @@
            instruction_->IsLoadClass() ||
            instruction_->IsLoadString() ||
            instruction_->IsInstanceOf() ||
-           instruction_->IsCheckCast())
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
+            instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
 
@@ -528,8 +534,12 @@
     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
-    DCHECK(!instruction_->IsInvoke() ||
-           (instruction_->IsInvokeStaticOrDirect() &&
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) &&
             instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
@@ -542,7 +552,7 @@
     // introduce a copy of it, `index`.
     Location index = index_;
     if (index_.IsValid()) {
-      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
       if (instruction_->IsArrayGet()) {
         // Compute real offset and store it in index_.
         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
@@ -590,7 +600,11 @@
             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
       } else {
-        DCHECK(instruction_->IsInvoke());
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
         DCHECK(instruction_->GetLocations()->Intrinsified());
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
@@ -894,6 +908,11 @@
   __ Bind(&string_patches_.back().label);
 }
 
+void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) {
+  type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex());
+  __ Bind(&type_patches_.back().label);
+}
+
 Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
                                                             uint32_t element_offset) {
   // Add a patch entry and return the label.
@@ -908,7 +927,8 @@
       relative_call_patches_.size() +
       pc_relative_dex_cache_patches_.size() +
       simple_patches_.size() +
-      string_patches_.size();
+      string_patches_.size() +
+      type_patches_.size();
   linker_patches->reserve(size);
   // The label points to the end of the "movl" insn but the literal offset for method
   // patch needs to point to the embedded constant which occupies the last 4 bytes.
@@ -944,6 +964,14 @@
                                                                info.label.Position(),
                                                                info.string_index));
   }
+  for (const TypePatchInfo<Label>& info : type_patches_) {
+    // These are always PC-relative, see GetSupportedLoadClassKind().
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset,
+                                                             &info.dex_file,
+                                                             info.label.Position(),
+                                                             info.type_index));
+  }
 }
 
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -1023,6 +1051,7 @@
         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -2228,6 +2257,8 @@
   LocationSummary* locations = invoke->GetLocations();
   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
   Location receiver = locations->InAt(0);
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
 
@@ -2253,12 +2284,6 @@
   // intact/accessible until the end of the marking phase (the
   // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
-  // temp = temp->GetAddressOfIMT()
-  __ movq(temp,
-      Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
-  // temp = temp->GetImtEntryAt(method_offset);
-  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-      invoke->GetImtIndex(), kX86_64PointerSize));
   // temp = temp->GetImtEntryAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
@@ -3982,11 +4007,8 @@
     method_offset = mirror::Class::EmbeddedVTableEntryOffset(
         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
   } else {
-    __ movq(locations->Out().AsRegister<CpuRegister>(),
-            Address(locations->InAt(0).AsRegister<CpuRegister>(),
-            mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
-    method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
-        instruction->GetIndex(), kX86_64PointerSize));
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
   }
   __ movq(locations->Out().AsRegister<CpuRegister>(),
           Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
@@ -4540,11 +4562,11 @@
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
   Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
 
   Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
@@ -4556,7 +4578,6 @@
     }
 
     case Primitive::kPrimByte: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
@@ -4568,7 +4589,6 @@
     }
 
     case Primitive::kPrimShort: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
@@ -4580,7 +4600,6 @@
     }
 
     case Primitive::kPrimChar: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
@@ -4592,7 +4611,6 @@
     }
 
     case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
@@ -4607,7 +4625,6 @@
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       // /* HeapReference<Object> */ out =
       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
@@ -4641,7 +4658,6 @@
     }
 
     case Primitive::kPrimLong: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movq(out, Address(obj,
@@ -4653,7 +4669,6 @@
     }
 
     case Primitive::kPrimFloat: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
@@ -4665,7 +4680,6 @@
     }
 
     case Primitive::kPrimDouble: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
@@ -5317,13 +5331,64 @@
   // No need for memory fence, thanks to the x86-64 memory model.
 }
 
+HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  if (kEmitCompilerReadBarrier) {
+    switch (desired_class_load_kind) {
+      case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      case HLoadClass::LoadKind::kBootImageAddress:
+        // TODO: Implement for read barrier.
+        return HLoadClass::LoadKind::kDexCacheViaMethod;
+      default:
+        break;
+    }
+  }
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      DCHECK(!GetCompilerOptions().GetCompilePic());
+      // We prefer the always-available RIP-relative address for the x86-64 boot image.
+      return HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().GetCompilePic());
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+      break;
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
-  InvokeRuntimeCallingConvention calling_convention;
-  CodeGenerator::CreateLoadClassLocationSummary(
-      cls,
-      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(RAX),
-      /* code_generator_supports_read_barrier */ true);
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Location::RegisterLocation(RAX),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -5340,37 +5405,86 @@
 
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
-  CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
 
-  if (cls->IsReferrersClass()) {
-    DCHECK(!cls->CanCallRuntime());
-    DCHECK(!cls->MustGenerateClinitCheck());
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    GenerateGcRootFieldLoad(
-        cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    // /* GcRoot<mirror::Class>[] */ out =
-    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
-    __ movq(out, Address(current_method,
-                         ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
-    // /* GcRoot<mirror::Class> */ out = out[type_index]
-    GenerateGcRootFieldLoad(
-        cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-
-    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
-      DCHECK(cls->CanCallRuntime());
-      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
-          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-      codegen_->AddSlowPath(slow_path);
-      if (!cls->IsInDexCache()) {
-        __ testl(out, out);
-        __ j(kEqual, slow_path->GetEntryLabel());
-      }
-      if (cls->MustGenerateClinitCheck()) {
-        GenerateClassInitializationCheck(slow_path, out);
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(!kEmitCompilerReadBarrier);
+      __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
+      codegen_->RecordTypePatch(cls);
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress: {
+      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_NE(cls->GetAddress(), 0u);
+      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+      __ movl(out, Immediate(address));  // Zero-extended.
+      codegen_->RecordSimplePatch();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheAddress: {
+      DCHECK_NE(cls->GetAddress(), 0u);
+      // /* GcRoot<mirror::Class> */ out = *address
+      if (IsUint<32>(cls->GetAddress())) {
+        Address address = Address::Absolute(cls->GetAddress(), /* no_rip */ true);
+        GenerateGcRootFieldLoad(cls, out_loc, address);
       } else {
-        __ Bind(slow_path->GetExitLabel());
+        // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
+        __ movq(out, Immediate(cls->GetAddress()));
+        GenerateGcRootFieldLoad(cls, out_loc, Address(out, 0));
       }
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      uint32_t offset = cls->GetDexCacheElementOffset();
+      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), offset);
+      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
+                                          /* no_rip */ false);
+      // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+      __ movq(out,
+              Address(current_method,
+                      ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      GenerateGcRootFieldLoad(
+          cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
+      UNREACHABLE();
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ testl(out, out);
+      __ j(kEqual, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
     }
   }
 }
@@ -5461,6 +5575,7 @@
     }
     case HLoadString::LoadKind::kDexCacheAddress: {
       DCHECK_NE(load->GetAddress(), 0u);
+      // /* GcRoot<mirror::String> */ out = *address
       if (IsUint<32>(load->GetAddress())) {
         Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
         GenerateGcRootFieldLoad(load, out_loc, address);
@@ -5476,6 +5591,7 @@
       Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
                                           /* no_rip */ false);
+      // /* GcRoot<mirror::String> */ out = *address  /* PC-relative */
       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
       break;
     }
@@ -6317,6 +6433,9 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
   Address src = index.IsConstant() ?
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 7cf1245..d7cfd37 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -388,6 +388,11 @@
   HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
 
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+
   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
@@ -400,6 +405,7 @@
 
   void RecordSimplePatch();
   void RecordStringPatch(HLoadString* load_string);
+  void RecordTypePatch(HLoadClass* load_class);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
@@ -427,6 +433,14 @@
                                              Location index,
                                              Location temp,
                                              bool needs_null_check);
+  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 CpuRegister obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
@@ -529,15 +543,6 @@
   static constexpr int32_t kDummy32BitOffset = 256;
 
  private:
-  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
-  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
-                                                 Location ref,
-                                                 CpuRegister obj,
-                                                 const Address& src,
-                                                 Location temp,
-                                                 bool needs_null_check);
-
   struct PcRelativeDexCacheAccessInfo {
     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
         : target_dex_file(dex_file), element_offset(element_off), label() { }
@@ -569,6 +574,8 @@
   ArenaDeque<Label> simple_patches_;
   // String patch locations.
   ArenaDeque<StringPatchInfo<Label>> string_patches_;
+  // Type patch locations.
+  ArenaDeque<TypePatchInfo<Label>> type_patches_;
 
   // Fixups for jump tables need to be handled specially.
   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
index e9072b9..14c318e 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -44,8 +44,23 @@
   }
 
  private:
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    // If this is a load with PC-relative access to the dex cache types array,
+    // we need to add the dex cache arrays base as the special input.
+    if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) {
+      // Initialize base for target dex file if needed.
+      const DexFile& dex_file = load_class->GetDexFile();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, &dex_file);
+      base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex()));
+      // Add the special argument base to the load.
+      load_class->AddSpecialInput(base);
+    }
+  }
+
   void VisitLoadString(HLoadString* load_string) OVERRIDE {
-    // If this is a load with PC-relative access to the dex cache methods array,
+    // If this is a load with PC-relative access to the dex cache strings array,
     // we need to add the dex cache arrays base as the special input.
     if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) {
       // Initialize base for target dex file if needed.
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
new file mode 100644
index 0000000..0f42d9c
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_cache_array_fixups_mips.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace mips {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+      : HGraphVisitor(graph),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HMipsDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kMipsPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    return dex_cache_array_bases_.GetOrCreate(
+        &dex_file,
+        [this, &dex_file]() {
+          HMipsDexCacheArraysBase* base =
+              new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file);
+          HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+          // Insert the base at the start of the entry block, move it to a better
+          // position later in MoveBaseIfNeeded().
+          entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+          return base;
+        });
+  }
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
+  DexCacheArrayFixupsVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h
new file mode 100644
index 0000000..c8def28
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace mips {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {}
+
+  void Run() OVERRIDE;
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 2bd2403..c8cba20 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -362,7 +362,7 @@
                             instruction->GetId()));
     }
     size_t use_index = use.GetIndex();
-    auto&& user_inputs = user->GetInputs();
+    HConstInputsRef user_inputs = user->GetInputs();
     if ((use_index >= user_inputs.size()) || (user_inputs[use_index] != instruction)) {
       AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong "
                             "UseListNode index.",
@@ -490,7 +490,7 @@
   VisitInstruction(invoke);
 
   if (invoke->IsStaticWithExplicitClinitCheck()) {
-    HInstruction* last_input = invoke->GetInputs().back();
+    const HInstruction* last_input = invoke->GetInputs().back();
     if (last_input == nullptr) {
       AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
                             "has a null pointer as last input.",
@@ -664,17 +664,19 @@
   }
 }
 
-static bool IsSameSizeConstant(HInstruction* insn1, HInstruction* insn2) {
+static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* insn2) {
   return insn1->IsConstant()
       && insn2->IsConstant()
       && Primitive::Is64BitType(insn1->GetType()) == Primitive::Is64BitType(insn2->GetType());
 }
 
-static bool IsConstantEquivalent(HInstruction* insn1, HInstruction* insn2, BitVector* visited) {
+static bool IsConstantEquivalent(const HInstruction* insn1,
+                                 const HInstruction* insn2,
+                                 BitVector* visited) {
   if (insn1->IsPhi() &&
       insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
-    auto&& insn1_inputs = insn1->GetInputs();
-    auto&& insn2_inputs = insn2->GetInputs();
+    HConstInputsRef insn1_inputs = insn1->GetInputs();
+    HConstInputsRef insn2_inputs = insn2->GetInputs();
     if (insn1_inputs.size() != insn2_inputs.size()) {
       return false;
     }
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 3084a4f..9d67373 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -372,6 +372,10 @@
   }
 
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    StartAttributeStream("load_kind") << load_class->GetLoadKind();
+    const char* descriptor = load_class->GetDexFile().GetTypeDescriptor(
+        load_class->GetDexFile().GetTypeId(load_class->GetTypeIndex()));
+    StartAttributeStream("class_name") << PrettyDescriptor(descriptor);
     StartAttributeStream("gen_clinit_check") << std::boolalpha
         << load_class->MustGenerateClinitCheck() << std::noboolalpha;
     StartAttributeStream("needs_access_check") << std::boolalpha
@@ -399,6 +403,16 @@
         << array_length->IsStringLength() << std::noboolalpha;
   }
 
+  void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
+    StartAttributeStream("is_string_char_at") << std::boolalpha
+        << bounds_check->IsStringCharAt() << std::noboolalpha;
+  }
+
+  void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+    StartAttributeStream("is_string_char_at") << std::boolalpha
+        << array_get->IsStringCharAt() << std::noboolalpha;
+  }
+
   void VisitArraySet(HArraySet* array_set) OVERRIDE {
     StartAttributeStream("value_can_be_null") << std::boolalpha
         << array_set->GetValueCanBeNull() << std::noboolalpha;
@@ -497,7 +511,7 @@
 
   void PrintInstruction(HInstruction* instruction) {
     output_ << instruction->DebugName();
-    auto&& inputs = instruction->GetInputs();
+    HConstInputsRef inputs = instruction->GetInputs();
     if (!inputs.empty()) {
       StringList input_list;
       for (const HInstruction* input : inputs) {
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 0a5cf80..129c2a9 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -341,7 +341,7 @@
                                                                          HInstruction* phi,
                                                                          size_t input_index) {
   // Match all phi inputs from input_index onwards exactly.
-  auto&& inputs = phi->GetInputs();
+  HInputsRef inputs = phi->GetInputs();
   DCHECK_LT(input_index, inputs.size());
   InductionInfo* a = LookupInfo(loop, inputs[input_index]);
   for (size_t i = input_index + 1; i < inputs.size(); i++) {
@@ -464,7 +464,7 @@
 HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HInstruction* phi,
                                                                       size_t input_index) {
   // Match all phi inputs from input_index onwards exactly.
-  auto&& inputs = phi->GetInputs();
+  HInputsRef inputs = phi->GetInputs();
   DCHECK_LT(input_index, inputs.size());
   auto ita = cycle_.find(inputs[input_index]);
   if (ita != cycle_.end()) {
@@ -670,7 +670,7 @@
   //     an unsigned entity, for example, as in the following loop that uses the full range:
   //     for (int i = INT_MIN; i < INT_MAX; i++) // TC = UINT_MAX
   // (2) The TC is only valid if the loop is taken, otherwise TC = 0, as in:
-  //     for (int i = 12; i < U; i++) // TC = 0 when U < 12
+  //     for (int i = 12; i < U; i++) // TC = 0 when U <= 12
   //     If this cannot be determined at compile-time, the TC is only valid within the
   //     loop-body proper, not the loop-header unless enforced with an explicit taken-test.
   // (3) The TC is only valid if the loop is finite, otherwise TC has no value, as in:
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index f1965f0..7c74816 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -132,7 +132,7 @@
                                  InductionInfo* a,
                                  InductionInfo* b,
                                  Primitive::Type type) {
-    DCHECK(a != nullptr);
+    DCHECK(a != nullptr && b != nullptr);
     return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, type);
   }
 
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index bc920d9..5e587e0 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -73,9 +73,12 @@
   return v;
 }
 
-/**
- * Corrects a value for type to account for arithmetic wrap-around in lower precision.
- */
+/** Helper method to test for a constant value. */
+static bool IsConstantValue(InductionVarRange::Value v) {
+  return v.is_known && v.a_constant == 0;
+}
+
+/** Corrects a value for type to account for arithmetic wrap-around in lower precision. */
 static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimShort:
@@ -85,26 +88,15 @@
       // TODO: maybe some room for improvement, like allowing widening conversions
       const int32_t min = Primitive::MinValueOfIntegralType(type);
       const int32_t max = Primitive::MaxValueOfIntegralType(type);
-      return (v.is_known && v.a_constant == 0 && min <= v.b_constant && v.b_constant <= max)
+      return (IsConstantValue(v) && min <= v.b_constant && v.b_constant <= max)
           ? v
           : InductionVarRange::Value();
     }
     default:
-      // At int or higher.
       return v;
   }
 }
 
-/** Helper method to test for a constant value. */
-static bool IsConstantValue(InductionVarRange::Value v) {
-  return v.is_known && v.a_constant == 0;
-}
-
-/** Helper method to test for same constant value. */
-static bool IsSameConstantValue(InductionVarRange::Value v1, InductionVarRange::Value v2) {
-  return IsConstantValue(v1) && IsConstantValue(v2) && v1.b_constant == v2.b_constant;
-}
-
 /** Helper method to insert an instruction. */
 static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
   DCHECK(block != nullptr);
@@ -119,22 +111,22 @@
 //
 
 InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis)
-    : induction_analysis_(induction_analysis) {
+    : induction_analysis_(induction_analysis),
+      chase_hint_(nullptr) {
   DCHECK(induction_analysis != nullptr);
 }
 
 bool InductionVarRange::GetInductionRange(HInstruction* context,
                                           HInstruction* instruction,
+                                          HInstruction* chase_hint,
                                           /*out*/Value* min_val,
                                           /*out*/Value* max_val,
                                           /*out*/bool* needs_finite_test) {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop == nullptr) {
-    return false;  // no loop
-  }
-  HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
-  if (info == nullptr) {
-    return false;  // no induction information
+  HLoopInformation* loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* info = nullptr;
+  HInductionVarAnalysis::InductionInfo* trip = nullptr;
+  if (!HasInductionInfo(context, instruction, &loop, &info, &trip)) {
+    return false;
   }
   // Type int or lower (this is not too restrictive since intended clients, like
   // bounds check elimination, will have truncated higher precision induction
@@ -148,45 +140,15 @@
     default:
       return false;
   }
-  // Set up loop information.
-  HBasicBlock* header = loop->GetHeader();
-  bool in_body = context->GetBlock() != header;
-  HInductionVarAnalysis::InductionInfo* trip =
-      induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
   // Find range.
+  chase_hint_ = chase_hint;
+  bool in_body = context->GetBlock() != loop->GetHeader();
   *min_val = GetVal(info, trip, in_body, /* is_min */ true);
   *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
   *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
   return true;
 }
 
-bool InductionVarRange::RefineOuter(/*in-out*/ Value* min_val,
-                                    /*in-out*/ Value* max_val) const {
-  if (min_val->instruction != nullptr || max_val->instruction != nullptr) {
-    Value v1_min = RefineOuter(*min_val, /* is_min */ true);
-    Value v2_max = RefineOuter(*max_val, /* is_min */ false);
-    // The refined range is safe if both sides refine the same instruction. Otherwise, since two
-    // different ranges are combined, the new refined range is safe to pass back to the client if
-    // the extremes of the computed ranges ensure no arithmetic wrap-around anomalies occur.
-    if (min_val->instruction != max_val->instruction) {
-      Value v1_max = RefineOuter(*min_val, /* is_min */ false);
-      Value v2_min = RefineOuter(*max_val, /* is_min */ true);
-      if (!IsConstantValue(v1_max) ||
-          !IsConstantValue(v2_min) ||
-          v1_max.b_constant > v2_min.b_constant) {
-        return false;
-      }
-    }
-    // Did something change?
-    if (v1_min.instruction != min_val->instruction || v2_max.instruction != max_val->instruction) {
-      *min_val = v1_min;
-      *max_val = v2_max;
-      return true;
-    }
-  }
-  return false;
-}
-
 bool InductionVarRange::CanGenerateCode(HInstruction* context,
                                         HInstruction* instruction,
                                         /*out*/bool* needs_finite_test,
@@ -226,7 +188,7 @@
 
 bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
                                    ConstantRequest request,
-                                   /*out*/ int64_t *value) const {
+                                   /*out*/ int64_t* value) const {
   if (info != nullptr) {
     // A direct 32-bit or 64-bit constant fetch. This immediately satisfies
     // any of the three requests (kExact, kAtMost, and KAtLeast).
@@ -236,27 +198,16 @@
         return true;
       }
     }
-    // Try range analysis while traversing outward on loops.
-    bool in_body = true;  // no known trip count
-    Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true);
-    Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false);
-    do {
-      // Make sure *both* extremes are known to avoid arithmetic wrap-around anomalies.
-      if (IsConstantValue(v_min) && IsConstantValue(v_max) && v_min.b_constant <= v_max.b_constant) {
-        if ((request == kExact && v_min.b_constant == v_max.b_constant) || request == kAtMost) {
-          *value = v_max.b_constant;
-          return true;
-        } else if (request == kAtLeast) {
-          *value = v_min.b_constant;
-          return true;
-        }
-      }
-    } while (RefineOuter(&v_min, &v_max));
-    // Exploit array length + c >= c, with c <= 0 to avoid arithmetic wrap-around anomalies
-    // (e.g. array length == maxint and c == 1 would yield minint).
-    if (request == kAtLeast) {
-      if (v_min.a_constant == 1 && v_min.b_constant <= 0 && v_min.instruction->IsArrayLength()) {
-        *value = v_min.b_constant;
+    // Try range analysis on the invariant, but only on proper range to avoid wrap-around anomalies.
+    Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true);
+    Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false);
+    if (IsConstantValue(min_val) &&
+        IsConstantValue(max_val) && min_val.b_constant <= max_val.b_constant) {
+      if ((request == kExact && min_val.b_constant == max_val.b_constant) || request == kAtMost) {
+        *value = max_val.b_constant;
+        return true;
+      } else if (request == kAtLeast) {
+        *value = min_val.b_constant;
         return true;
       }
     }
@@ -264,6 +215,51 @@
   return false;
 }
 
+bool InductionVarRange::HasInductionInfo(
+    HInstruction* context,
+    HInstruction* instruction,
+    /*out*/ HLoopInformation** loop,
+    /*out*/ HInductionVarAnalysis::InductionInfo** info,
+    /*out*/ HInductionVarAnalysis::InductionInfo** trip) const {
+  HLoopInformation* l = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
+  if (l != nullptr) {
+    HInductionVarAnalysis::InductionInfo* i = induction_analysis_->LookupInfo(l, instruction);
+    if (i != nullptr) {
+      *loop = l;
+      *info = i;
+      *trip = induction_analysis_->LookupInfo(l, l->GetHeader()->GetLastInstruction());
+      return true;
+    }
+  }
+  return false;
+}
+
+bool InductionVarRange::IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const {
+  if (trip != nullptr) {
+    // Both bounds that define a trip-count are well-behaved if they either are not defined
+    // in any loop, or are contained in a proper interval. This allows finding the min/max
+    // of an expression by chasing outward.
+    InductionVarRange range(induction_analysis_);
+    HInductionVarAnalysis::InductionInfo* lower = trip->op_b->op_a;
+    HInductionVarAnalysis::InductionInfo* upper = trip->op_b->op_b;
+    int64_t not_used = 0;
+    return (!HasFetchInLoop(lower) || range.IsConstant(lower, kAtLeast, &not_used)) &&
+           (!HasFetchInLoop(upper) || range.IsConstant(upper, kAtLeast, &not_used));
+  }
+  return true;
+}
+
+bool InductionVarRange::HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const {
+  if (info != nullptr) {
+    if (info->induction_class == HInductionVarAnalysis::kInvariant &&
+        info->operation == HInductionVarAnalysis::kFetch) {
+      return info->fetch->GetBlock()->GetLoopInformation() != nullptr;
+    }
+    return HasFetchInLoop(info->op_a) || HasFetchInLoop(info->op_b);
+  }
+  return false;
+}
+
 bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const {
   if (info != nullptr) {
     if (info->induction_class == HInductionVarAnalysis::kLinear) {
@@ -299,13 +295,13 @@
                                                       HInductionVarAnalysis::InductionInfo* trip,
                                                       bool in_body,
                                                       bool is_min) const {
-  // Detect common situation where an offset inside the trip count cancels out during range
+  // Detect common situation where an offset inside the trip-count cancels out during range
   // analysis (finding max a * (TC - 1) + OFFSET for a == 1 and TC = UPPER - OFFSET or finding
   // min a * (TC - 1) + OFFSET for a == -1 and TC = OFFSET - UPPER) to avoid losing information
   // with intermediate results that only incorporate single instructions.
   if (trip != nullptr) {
     HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a;
-    if (trip_expr->operation == HInductionVarAnalysis::kSub) {
+    if (trip_expr->type == info->type && trip_expr->operation == HInductionVarAnalysis::kSub) {
       int64_t stride_value = 0;
       if (IsConstant(info->op_a, kExact, &stride_value)) {
         if (!is_min && stride_value == 1) {
@@ -349,12 +345,25 @@
                                                      HInductionVarAnalysis::InductionInfo* trip,
                                                      bool in_body,
                                                      bool is_min) const {
-  // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes
-  // more likely range analysis will compare the same instructions as terminal nodes.
+  // Stop chasing the instruction at constant or hint.
   int64_t value;
-  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value))  {
+  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
     return Value(static_cast<int32_t>(value));
-  } else if (instruction->IsAdd()) {
+  } else if (instruction == chase_hint_) {
+    return Value(instruction, 1, 0);
+  }
+  // Special cases when encountering a single instruction that denotes trip count in the
+  // loop-body: min is 1 and, when chasing constants, max of safe trip-count is max int
+  if (in_body && trip != nullptr && instruction == trip->op_a->fetch) {
+    if (is_min) {
+      return Value(1);
+    } else if (chase_hint_ == nullptr && !IsUnsafeTripCount(trip)) {
+      return Value(std::numeric_limits<int32_t>::max());
+    }
+  }
+  // Chase the instruction a bit deeper into the HIR tree, so that it becomes more likely
+  // range analysis will compare the same instructions as terminal nodes.
+  if (instruction->IsAdd()) {
     if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
       return AddValue(Value(static_cast<int32_t>(value)),
                       GetFetch(instruction->InputAt(1), trip, in_body, is_min));
@@ -362,19 +371,35 @@
       return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
                       Value(static_cast<int32_t>(value)));
     }
-  } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
-    return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
+  } else if (instruction->IsArrayLength()) {
+    // Return extreme values when chasing constants. Otherwise, chase deeper.
+    if (chase_hint_ == nullptr) {
+      return is_min ? Value(0) : Value(std::numeric_limits<int32_t>::max());
+    } else if (instruction->InputAt(0)->IsNewArray()) {
+      return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
+    }
   } else if (instruction->IsTypeConversion()) {
     // Since analysis is 32-bit (or narrower) we allow a widening along the path.
     if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt &&
         instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) {
       return GetFetch(instruction->InputAt(0), trip, in_body, is_min);
     }
-  } else if (is_min) {
-    // Special case for finding minimum: minimum of trip-count in loop-body is 1.
-    if (trip != nullptr && in_body && instruction == trip->op_a->fetch) {
-      return Value(1);
-    }
+  }
+  // Chase an invariant fetch that is defined by an outer loop if the trip-count used
+  // so far is well-behaved in both bounds and the next trip-count is safe.
+  // Example:
+  //   for (int i = 0; i <= 100; i++)  // safe
+  //     for (int j = 0; j <= i; j++)  // well-behaved
+  //       j is in range [0, i  ] (if i is chase hint)
+  //         or in range [0, 100] (otherwise)
+  HLoopInformation* next_loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* next_info = nullptr;
+  HInductionVarAnalysis::InductionInfo* next_trip = nullptr;
+  bool next_in_body = true;  // inner loop is always in body of outer loop
+  if (HasInductionInfo(instruction, instruction, &next_loop, &next_info, &next_trip) &&
+      IsWellBehavedTripCount(trip) &&
+      !IsUnsafeTripCount(next_trip)) {
+    return GetVal(next_info, next_trip, next_in_body, is_min);
   }
   return Value(instruction, 1, 0);
 }
@@ -421,9 +446,8 @@
             break;
         }
         break;
-      case HInductionVarAnalysis::kLinear: {
+      case HInductionVarAnalysis::kLinear:
         return CorrectForType(GetLinear(info, trip, in_body, is_min), info->type);
-      }
       case HInductionVarAnalysis::kWrapAround:
       case HInductionVarAnalysis::kPeriodic:
         return MergeVal(GetVal(info->op_a, trip, in_body, is_min),
@@ -438,20 +462,18 @@
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
                                                    bool is_min) const {
+  // Constant times range.
+  int64_t value = 0;
+  if (IsConstant(info1, kExact, &value)) {
+    return MulRangeAndConstant(value, info2, trip, in_body, is_min);
+  } else if (IsConstant(info2, kExact, &value)) {
+    return MulRangeAndConstant(value, info1, trip, in_body, is_min);
+  }
+  // Interval ranges.
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
-  // Try to refine first operand.
-  if (!IsConstantValue(v1_min) && !IsConstantValue(v1_max)) {
-    RefineOuter(&v1_min, &v1_max);
-  }
-  // Constant times range.
-  if (IsSameConstantValue(v1_min, v1_max)) {
-    return MulRangeAndConstant(v2_min, v2_max, v1_min, is_min);
-  } else if (IsSameConstantValue(v2_min, v2_max)) {
-    return MulRangeAndConstant(v1_min, v1_max, v2_min, is_min);
-  }
   // Positive range vs. positive or negative range.
   if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
     if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
@@ -476,14 +498,16 @@
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
                                                    bool is_min) const {
+  // Range divided by constant.
+  int64_t value = 0;
+  if (IsConstant(info2, kExact, &value)) {
+    return DivRangeAndConstant(value, info1, trip, in_body, is_min);
+  }
+  // Interval ranges.
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
-  // Range divided by constant.
-  if (IsSameConstantValue(v2_min, v2_max)) {
-    return DivRangeAndConstant(v1_min, v1_max, v2_min, is_min);
-  }
   // Positive range vs. positive or negative range.
   if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
     if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
@@ -503,18 +527,30 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MulRangeAndConstant(Value v_min,
-                                                                Value v_max,
-                                                                Value c,
-                                                                bool is_min) const {
-  return is_min == (c.b_constant >= 0) ? MulValue(v_min, c) : MulValue(v_max, c);
+InductionVarRange::Value InductionVarRange::MulRangeAndConstant(
+    int64_t value,
+    HInductionVarAnalysis::InductionInfo* info,
+    HInductionVarAnalysis::InductionInfo* trip,
+    bool in_body,
+    bool is_min) const {
+  if (CanLongValueFitIntoInt(value)) {
+    Value c(static_cast<int32_t>(value));
+    return MulValue(GetVal(info, trip, in_body, is_min == value >= 0), c);
+  }
+  return Value();
 }
 
-InductionVarRange::Value InductionVarRange::DivRangeAndConstant(Value v_min,
-                                                                Value v_max,
-                                                                Value c,
-                                                                bool is_min) const {
-  return is_min == (c.b_constant >= 0) ? DivValue(v_min, c) : DivValue(v_max, c);
+InductionVarRange::Value InductionVarRange::DivRangeAndConstant(
+    int64_t value,
+    HInductionVarAnalysis::InductionInfo* info,
+    HInductionVarAnalysis::InductionInfo* trip,
+    bool in_body,
+    bool is_min) const {
+  if (CanLongValueFitIntoInt(value)) {
+    Value c(static_cast<int32_t>(value));
+    return DivValue(GetVal(info, trip, in_body, is_min == value >= 0), c);
+  }
+  return Value();
 }
 
 InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const {
@@ -580,28 +616,6 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) const {
-  if (v.instruction == nullptr) {
-    return v;  // nothing to refine
-  }
-  HLoopInformation* loop =
-      v.instruction->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop == nullptr) {
-    return v;  // no loop
-  }
-  HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, v.instruction);
-  if (info == nullptr) {
-    return v;  // no induction information
-  }
-  // Set up loop information.
-  HBasicBlock* header = loop->GetHeader();
-  bool in_body = true;  // inner always in more outer
-  HInductionVarAnalysis::InductionInfo* trip =
-      induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-  // Try to refine "a x instruction + b" with outer loop range information on instruction.
-  return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)), Value(v.b_constant));
-}
-
 bool InductionVarRange::GenerateCode(HInstruction* context,
                                      HInstruction* instruction,
                                      HGraph* graph,
@@ -611,27 +625,18 @@
                                      /*out*/HInstruction** taken_test,
                                      /*out*/bool* needs_finite_test,
                                      /*out*/bool* needs_taken_test) const {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop == nullptr) {
-    return false;  // no loop
-  }
-  HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
-  if (info == nullptr) {
-    return false;  // no induction information
-  }
-  // Set up loop information.
-  HBasicBlock* header = loop->GetHeader();
-  bool in_body = context->GetBlock() != header;
-  HInductionVarAnalysis::InductionInfo* trip =
-      induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-  if (trip == nullptr) {
-    return false;  // codegen relies on trip count
+  HLoopInformation* loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* info = nullptr;
+  HInductionVarAnalysis::InductionInfo* trip = nullptr;
+  if (!HasInductionInfo(context, instruction, &loop, &info, &trip) || trip == nullptr) {
+    return false;  // codegen needs all information, including tripcount
   }
   // Determine what tests are needed. A finite test is needed if the evaluation code uses the
   // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
   // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
   // code does not use the trip-count explicitly (since there could be an implicit relation
   // between e.g. an invariant subscript and a not-taken condition).
+  bool in_body = context->GetBlock() != loop->GetHeader();
   *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
   *needs_taken_test = IsBodyTripCount(trip);
   // Code generation for taken test: generate the code when requested or otherwise analyze
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 0af4156..00aaa16 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -57,21 +57,19 @@
   explicit InductionVarRange(HInductionVarAnalysis* induction);
 
   /**
-   * Given a context denoted by the first instruction, returns a possibly conservative
-   * lower and upper bound on the instruction's value in the output parameters min_val
-   * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test
-   * is needed to protect the range evaluation inside its loop. Returns false on failure.
+   * Given a context denoted by the first instruction, returns a possibly conservative lower
+   * and upper bound on the instruction's value in the output parameters min_val and max_val,
+   * respectively. The need_finite_test flag denotes if an additional finite-test is needed
+   * to protect the range evaluation inside its loop. The parameter chase_hint defines an
+   * instruction at which chasing may stop. Returns false on failure.
    */
   bool GetInductionRange(HInstruction* context,
                          HInstruction* instruction,
+                         HInstruction* chase_hint,
                          /*out*/ Value* min_val,
                          /*out*/ Value* max_val,
                          /*out*/ bool* needs_finite_test);
 
-  /** Refines the values with induction of next outer loop. Returns true on change. */
-  bool RefineOuter(/*in-out*/ Value* min_val,
-                   /*in-out*/ Value* max_val) const;
-
   /**
    * Returns true if range analysis is able to generate code for the lower and upper
    * bound expressions on the instruction in the given context. The need_finite_test
@@ -132,11 +130,20 @@
    */
   bool IsConstant(HInductionVarAnalysis::InductionInfo* info,
                   ConstantRequest request,
-                  /*out*/ int64_t *value) const;
+                  /*out*/ int64_t* value) const;
 
+  /** Returns whether induction information can be obtained. */
+  bool HasInductionInfo(HInstruction* context,
+                        HInstruction* instruction,
+                        /*out*/ HLoopInformation** loop,
+                        /*out*/ HInductionVarAnalysis::InductionInfo** info,
+                        /*out*/ HInductionVarAnalysis::InductionInfo** trip) const;
+
+  bool HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const;
   bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const;
   bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
   bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
+  bool IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
 
   Value GetLinear(HInductionVarAnalysis::InductionInfo* info,
                   HInductionVarAnalysis::InductionInfo* trip,
@@ -161,8 +168,16 @@
                bool in_body,
                bool is_min) const;
 
-  Value MulRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const;
-  Value DivRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const;
+  Value MulRangeAndConstant(int64_t value,
+                            HInductionVarAnalysis::InductionInfo* info,
+                            HInductionVarAnalysis::InductionInfo* trip,
+                            bool in_body,
+                            bool is_min) const;
+  Value DivRangeAndConstant(int64_t value,
+                            HInductionVarAnalysis::InductionInfo* info,
+                            HInductionVarAnalysis::InductionInfo* trip,
+                            bool in_body,
+                            bool is_min) const;
 
   Value AddValue(Value v1, Value v2) const;
   Value SubValue(Value v1, Value v2) const;
@@ -171,12 +186,6 @@
   Value MergeVal(Value v1, Value v2, bool is_min) const;
 
   /**
-   * Returns refined value using induction of next outer loop or the input value if no
-   * further refinement is possible.
-   */
-  Value RefineOuter(Value val, bool is_min) const;
-
-  /**
    * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
    * With values nullptr, the method can be used to determine if code generation
    * would be successful without generating actual code yet.
@@ -200,7 +209,10 @@
                     bool is_min) const;
 
   /** Results of prior induction variable analysis. */
-  HInductionVarAnalysis *induction_analysis_;
+  HInductionVarAnalysis* induction_analysis_;
+
+  /** Instruction at which chasing may stop. */
+  HInstruction* chase_hint_;
 
   friend class HInductionVarAnalysis;
   friend class InductionVarRangeTest;
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index dc04dc2..4ea170f 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -66,6 +66,8 @@
     entry_block_->AddInstruction(x_);
     y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
     entry_block_->AddInstruction(y_);
+    // Set arbitrary range analysis hint while testing private methods.
+    SetHint(x_);
   }
 
   /** Constructs loop with given upper bound. */
@@ -111,6 +113,11 @@
     iva_->Run();
   }
 
+  /** Sets hint. */
+  void SetHint(HInstruction* hint) {
+    range_.chase_hint_ = hint;
+  }
+
   /** Constructs an invariant. */
   HInductionVarAnalysis::InductionInfo* CreateInvariant(char opc,
                                                         HInductionVarAnalysis::InductionInfo* a,
@@ -122,6 +129,7 @@
       case 'n': op = HInductionVarAnalysis::kNeg; break;
       case '*': op = HInductionVarAnalysis::kMul; break;
       case '/': op = HInductionVarAnalysis::kDiv; break;
+      case '<': op = HInductionVarAnalysis::kLT;  break;
       default:  op = HInductionVarAnalysis::kNop; break;
     }
     return iva_->CreateInvariantOp(op, a, b);
@@ -137,22 +145,21 @@
     return CreateFetch(graph_->GetIntConstant(c));
   }
 
-  /** Constructs a trip-count. */
+  /** Constructs a constant trip-count. */
   HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) {
-    Primitive::Type type = Primitive::kPrimInt;
+    HInductionVarAnalysis::InductionOp op = HInductionVarAnalysis::kTripCountInBodyUnsafe;
     if (in_loop && safe) {
-      return iva_->CreateTripCount(
-          HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr, type);
+      op = HInductionVarAnalysis::kTripCountInLoop;
     } else if (in_loop) {
-      return iva_->CreateTripCount(
-          HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr, type);
+      op = HInductionVarAnalysis::kTripCountInLoopUnsafe;
     } else if (safe) {
-      return iva_->CreateTripCount(
-          HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr, type);
-    } else {
-      return iva_->CreateTripCount(
-          HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr, type);
+      op = HInductionVarAnalysis::kTripCountInBody;
     }
+    // Return TC with taken-test 0 < TC.
+    return iva_->CreateTripCount(op,
+                                 CreateConst(tc),
+                                 CreateInvariant('<', CreateConst(0), CreateConst(tc)),
+                                 Primitive::kPrimInt);
   }
 
   /** Constructs a linear a * i + b induction. */
@@ -197,13 +204,13 @@
   }
 
   Value GetMin(HInductionVarAnalysis::InductionInfo* info,
-               HInductionVarAnalysis::InductionInfo* induc) {
-    return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ true);
+               HInductionVarAnalysis::InductionInfo* trip) {
+    return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ true);
   }
 
   Value GetMax(HInductionVarAnalysis::InductionInfo* info,
-               HInductionVarAnalysis::InductionInfo* induc) {
-    return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ false);
+               HInductionVarAnalysis::InductionInfo* trip) {
+    return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ false);
   }
 
   Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
@@ -558,6 +565,31 @@
   ExpectEqual(Value(), MaxValue(Value(55), Value(y_, 1, -50)));
 }
 
+TEST_F(InductionVarRangeTest, ArrayLengthAndHints) {
+  HInstruction* new_array = new (&allocator_)
+      HNewArray(x_,
+                graph_->GetCurrentMethod(),
+                0, Primitive::kPrimInt,
+                graph_->GetDexFile(),
+                kQuickAllocArray);
+  entry_block_->AddInstruction(new_array);
+  HInstruction* array_length = new (&allocator_) HArrayLength(new_array, 0);
+  entry_block_->AddInstruction(array_length);
+  // With null hint: yields extreme constants.
+  const int32_t max_value = std::numeric_limits<int32_t>::max();
+  SetHint(nullptr);
+  ExpectEqual(Value(0), GetMin(CreateFetch(array_length), nullptr));
+  ExpectEqual(Value(max_value), GetMax(CreateFetch(array_length), nullptr));
+  // With explicit hint: yields the length instruction.
+  SetHint(array_length);
+  ExpectEqual(Value(array_length, 1, 0), GetMin(CreateFetch(array_length), nullptr));
+  ExpectEqual(Value(array_length, 1, 0), GetMax(CreateFetch(array_length), nullptr));
+  // With any non-null hint: chases beyond the length instruction.
+  SetHint(x_);
+  ExpectEqual(Value(x_, 1, 0), GetMin(CreateFetch(array_length), nullptr));
+  ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(array_length), nullptr));
+}
+
 //
 // Tests on public methods.
 //
@@ -570,23 +602,20 @@
   bool needs_finite_test = true;
 
   // In context of header: known.
-  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
-  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -597,23 +626,20 @@
   bool needs_finite_test = true;
 
   // In context of header: known.
-  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
-  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -625,23 +651,20 @@
   bool needs_taken_test = true;
 
   // In context of header: upper unknown.
-  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(x_, 1, -1), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
-  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(x_, 1, 0), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
@@ -695,23 +718,20 @@
   bool needs_taken_test = true;
 
   // In context of header: lower unknown.
-  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(x_, 1, 1), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
-  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(x_, 1, 0), v1);
   ExpectEqual(Value(999), v2);
-  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index d5e80b4..c67b2d5 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -656,8 +656,8 @@
     }
     ArtMethod* new_method = nullptr;
     if (invoke_instruction->IsInvokeInterface()) {
-      new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get(
-          method_index, pointer_size);
+      new_method = ic.GetTypeAt(i)->GetEmbeddedImTableEntry(
+          method_index % mirror::Class::kImtSize, pointer_size);
       if (new_method->IsRuntimeMethod()) {
         // Bail out as soon as we see a conflict trampoline in one of the target's
         // interface table.
@@ -756,7 +756,15 @@
     invoke_instruction->ReplaceWith(return_replacement);
   }
   invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
-  FixUpReturnReferenceType(invoke_instruction, method, return_replacement, do_rtp);
+  FixUpReturnReferenceType(method, return_replacement);
+  if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) {
+    // Actual return value has a more specific type than the method's declared
+    // return type. Run RTP again on the outer graph to propagate it.
+    ReferenceTypePropagation(graph_,
+                             outer_compilation_unit_.GetDexCache(),
+                             handles_,
+                             /* is_first_run */ false).Run();
+  }
   return true;
 }
 
@@ -1159,6 +1167,15 @@
     }
   }
 
+  // We have replaced formal arguments with actual arguments. If actual types
+  // are more specific than the declared ones, run RTP again on the inner graph.
+  if (ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) {
+    ReferenceTypePropagation(callee_graph,
+                             dex_compilation_unit.GetDexCache(),
+                             handles_,
+                             /* is_first_run */ false).Run();
+  }
+
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
   size_t number_of_inlined_instructions =
       RunOptimizations(callee_graph, code_item, dex_compilation_unit);
@@ -1332,13 +1349,87 @@
   return number_of_inlined_instructions;
 }
 
-void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction,
-                                        ArtMethod* resolved_method,
-                                        HInstruction* return_replacement,
-                                        bool do_rtp) {
+static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
+                                      bool declared_can_be_null,
+                                      HInstruction* actual_obj)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (declared_can_be_null && !actual_obj->CanBeNull()) {
+    return true;
+  }
+
+  ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo();
+  return (actual_rti.IsExact() && !declared_rti.IsExact()) ||
+         declared_rti.IsStrictSupertypeOf(actual_rti);
+}
+
+ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) {
+  return ReferenceTypePropagation::IsAdmissible(klass)
+      ? ReferenceTypeInfo::Create(handles_->NewHandle(klass))
+      : graph_->GetInexactObjectRti();
+}
+
+bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) {
+  // If this is an instance call, test whether the type of the `this` argument
+  // is more specific than the class which declares the method.
+  if (!resolved_method->IsStatic()) {
+    if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()),
+                                  /* declared_can_be_null */ false,
+                                  invoke_instruction->InputAt(0u))) {
+      return true;
+    }
+  }
+
+  size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+
+  // Iterate over the list of parameter types and test whether any of the
+  // actual inputs has a more specific reference type than the type declared in
+  // the signature.
+  const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList();
+  for (size_t param_idx = 0,
+              input_idx = resolved_method->IsStatic() ? 0 : 1,
+              e = (param_list == nullptr ? 0 : param_list->Size());
+       param_idx < e;
+       ++param_idx, ++input_idx) {
+    HInstruction* input = invoke_instruction->InputAt(input_idx);
+    if (input->GetType() == Primitive::kPrimNot) {
+      mirror::Class* param_cls = resolved_method->GetDexCacheResolvedType(
+          param_list->GetTypeItem(param_idx).type_idx_,
+          pointer_size);
+      if (IsReferenceTypeRefinement(GetClassRTI(param_cls),
+                                    /* declared_can_be_null */ true,
+                                    input)) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction,
+                                      HInstruction* return_replacement) {
   // Check the integrity of reference types and run another type propagation if needed.
   if (return_replacement != nullptr) {
     if (return_replacement->GetType() == Primitive::kPrimNot) {
+      // Test if the return type is a refinement of the declared return type.
+      if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(),
+                                    /* declared_can_be_null */ true,
+                                    return_replacement)) {
+        return true;
+      }
+    } else if (return_replacement->IsInstanceOf()) {
+      // Inlining InstanceOf into an If may put a tighter bound on reference types.
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method,
+                                        HInstruction* return_replacement) {
+  if (return_replacement != nullptr) {
+    if (return_replacement->GetType() == Primitive::kPrimNot) {
       if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
         // Make sure that we have a valid type for the return. We may get an invalid one when
         // we inline invokes with multiple branches and create a Phi for the result.
@@ -1347,36 +1438,7 @@
         DCHECK(return_replacement->IsPhi());
         size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
         mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */, pointer_size);
-        if (cls != nullptr && !cls->IsErroneous()) {
-          ReferenceTypeInfo::TypeHandle return_handle = handles_->NewHandle(cls);
-          return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
-              return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
-        } else {
-          // Return inexact object type on failures.
-          return_replacement->SetReferenceTypeInfo(graph_->GetInexactObjectRti());
-        }
-      }
-
-      if (do_rtp) {
-        // If the return type is a refinement of the declared type run the type propagation again.
-        ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-        ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-        if (invoke_rti.IsStrictSupertypeOf(return_rti)
-            || (return_rti.IsExact() && !invoke_rti.IsExact())
-            || !return_replacement->CanBeNull()) {
-          ReferenceTypePropagation(graph_,
-                                   outer_compilation_unit_.GetDexCache(),
-                                   handles_,
-                                   /* is_first_run */ false).Run();
-        }
-      }
-    } else if (return_replacement->IsInstanceOf()) {
-      if (do_rtp) {
-        // Inlining InstanceOf into an If may put a tighter bound on reference types.
-        ReferenceTypePropagation(graph_,
-                                 outer_compilation_unit_.GetDexCache(),
-                                 handles_,
-                                 /* is_first_run */ false).Run();
+        return_replacement->SetReferenceTypeInfo(GetClassRTI(cls));
       }
     }
   }
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 7cf1424..02d3a5f 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -124,10 +124,18 @@
                                            uint32_t dex_pc) const
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void FixUpReturnReferenceType(HInvoke* invoke_instruction,
-                                ArtMethod* resolved_method,
-                                HInstruction* return_replacement,
-                                bool do_rtp)
+  void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is
+  // admissible (see ReferenceTypePropagation::IsAdmissible for details).
+  // Otherwise returns inexact Object RTI.
+  ReferenceTypeInfo GetClassRTI(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Add a type guard on the given `receiver`. This will add to the graph:
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 1c67bcc..b412529 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -16,7 +16,6 @@
 
 #include "instruction_builder.h"
 
-#include "art_method-inl.h"
 #include "bytecode_utils.h"
 #include "class_linker.h"
 #include "driver/compiler_options.h"
@@ -891,7 +890,7 @@
                                            return_type,
                                            dex_pc,
                                            method_idx,
-                                           resolved_method->GetImtIndex());
+                                           resolved_method->GetDexMethodIndex());
   }
 
   return HandleInvoke(invoke,
@@ -934,7 +933,7 @@
       IsOutermostCompilingClass(type_index),
       dex_pc,
       needs_access_check,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, type_index));
+      /* is_in_dex_cache */ false);
 
   AppendInstruction(load_class);
   HInstruction* cls = load_class;
@@ -1025,7 +1024,7 @@
         is_outer_class,
         dex_pc,
         /*needs_access_check*/ false,
-        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index));
+        /* is_in_dex_cache */ false);
     AppendInstruction(load_class);
     clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
     AppendInstruction(clinit_check);
@@ -1377,15 +1376,13 @@
     }
   }
 
-  bool is_in_cache =
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index);
   HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
                                                  storage_index,
                                                  outer_dex_file,
                                                  is_outer_class,
                                                  dex_pc,
                                                  /*needs_access_check*/ false,
-                                                 is_in_cache);
+                                                 /* is_in_dex_cache */ false);
   AppendInstruction(constant);
 
   HInstruction* cls = constant;
@@ -1654,7 +1651,7 @@
       IsOutermostCompilingClass(type_index),
       dex_pc,
       !can_access,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index));
+      /* is_in_dex_cache */ false);
   AppendInstruction(cls);
 
   TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
@@ -2622,8 +2619,6 @@
       Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
       bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
           dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index);
-      bool is_in_dex_cache =
-          compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index);
       AppendInstruction(new (arena_) HLoadClass(
           graph_->GetCurrentMethod(),
           type_index,
@@ -2631,7 +2626,7 @@
           IsOutermostCompilingClass(type_index),
           dex_pc,
           !can_access,
-          is_in_dex_cache));
+          /* is_in_dex_cache */ false));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index eb1d156..e0410dc 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -54,6 +54,9 @@
   // De Morgan's laws:
   // ~a & ~b = ~(a | b)  and  ~a | ~b = ~(a & b)
   bool TryDeMorganNegationFactoring(HBinaryOperation* op);
+  bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction);
+  bool TrySubtractionChainSimplification(HBinaryOperation* instruction);
+
   void VisitShift(HBinaryOperation* shift);
 
   void VisitEqual(HEqual* equal) OVERRIDE;
@@ -101,6 +104,7 @@
   void SimplifyCompare(HInvoke* invoke, bool is_signum, Primitive::Type type);
   void SimplifyIsNaN(HInvoke* invoke);
   void SimplifyFP2Int(HInvoke* invoke);
+  void SimplifyStringCharAt(HInvoke* invoke);
   void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
@@ -235,22 +239,40 @@
 
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
-  HConstant* input_cst = instruction->GetConstantRight();
-  HInstruction* input_other = instruction->GetLeastConstantLeft();
+  HInstruction* shift_amount = instruction->GetRight();
+  HInstruction* value = instruction->GetLeft();
 
-  if (input_cst != nullptr) {
-    int64_t cst = Int64FromConstant(input_cst);
-    int64_t mask = (input_other->GetType() == Primitive::kPrimLong)
-        ? kMaxLongShiftDistance
-        : kMaxIntShiftDistance;
-    if ((cst & mask) == 0) {
+  int64_t implicit_mask = (value->GetType() == Primitive::kPrimLong)
+      ? kMaxLongShiftDistance
+      : kMaxIntShiftDistance;
+
+  if (shift_amount->IsConstant()) {
+    int64_t cst = Int64FromConstant(shift_amount->AsConstant());
+    if ((cst & implicit_mask) == 0) {
       // Replace code looking like
-      //    SHL dst, src, 0
+      //    SHL dst, value, 0
       // with
-      //    src
-      instruction->ReplaceWith(input_other);
+      //    value
+      instruction->ReplaceWith(value);
       instruction->GetBlock()->RemoveInstruction(instruction);
       RecordSimplification();
+      return;
+    }
+  }
+
+  // Shift operations implicitly mask the shift amount according to the type width. Get rid of
+  // unnecessary explicit masking operations on the shift amount.
+  // Replace code looking like
+  //    AND masked_shift, shift, <superset of implicit mask>
+  //    SHL dst, value, masked_shift
+  // with
+  //    SHL dst, value, shift
+  if (shift_amount->IsAnd()) {
+    HAnd* and_insn = shift_amount->AsAnd();
+    HConstant* mask = and_insn->GetConstantRight();
+    if ((mask != nullptr) && ((Int64FromConstant(mask) & implicit_mask) == implicit_mask)) {
+      instruction->ReplaceInput(and_insn->GetLeastConstantLeft(), 1);
+      RecordSimplification();
     }
   }
 }
@@ -944,7 +966,18 @@
     return;
   }
 
-  TryReplaceWithRotate(instruction);
+  if (TryReplaceWithRotate(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
+
+  if ((instruction->GetLeft()->IsSub() || instruction->GetRight()->IsSub()) &&
+      TrySubtractionChainSimplification(instruction)) {
+    return;
+  }
 }
 
 void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
@@ -1006,7 +1039,13 @@
     return;
   }
 
-  TryDeMorganNegationFactoring(instruction);
+  if (TryDeMorganNegationFactoring(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) {
@@ -1223,6 +1262,7 @@
       instruction->ReplaceWith(input_cst);
       instruction->GetBlock()->RemoveInstruction(instruction);
       RecordSimplification();
+      return;
     } else if (IsPowerOfTwo(factor)) {
       // Replace code looking like
       //    MUL dst, src, pow_of_2
@@ -1232,6 +1272,7 @@
       HShl* shl = new (allocator) HShl(type, input_other, shift);
       block->ReplaceAndRemoveInstructionWith(instruction, shl);
       RecordSimplification();
+      return;
     } else if (IsPowerOfTwo(factor - 1)) {
       // Transform code looking like
       //    MUL dst, src, (2^n + 1)
@@ -1246,6 +1287,7 @@
       block->InsertInstructionBefore(shl, instruction);
       block->ReplaceAndRemoveInstructionWith(instruction, add);
       RecordSimplification();
+      return;
     } else if (IsPowerOfTwo(factor + 1)) {
       // Transform code looking like
       //    MUL dst, src, (2^n - 1)
@@ -1260,8 +1302,13 @@
       block->InsertInstructionBefore(shl, instruction);
       block->ReplaceAndRemoveInstructionWith(instruction, sub);
       RecordSimplification();
+      return;
     }
   }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) {
@@ -1361,7 +1408,13 @@
 
   if (TryDeMorganNegationFactoring(instruction)) return;
 
-  TryReplaceWithRotate(instruction);
+  if (TryReplaceWithRotate(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitShl(HShl* instruction) {
@@ -1452,6 +1505,11 @@
     instruction->GetBlock()->RemoveInstruction(instruction);
     RecordSimplification();
     left->GetBlock()->RemoveInstruction(left);
+    return;
+  }
+
+  if (TrySubtractionChainSimplification(instruction)) {
+    return;
   }
 }
 
@@ -1505,7 +1563,13 @@
     return;
   }
 
-  TryReplaceWithRotate(instruction);
+  if (TryReplaceWithRotate(instruction)) {
+    return;
+  }
+
+  // TryHandleAssociativeAndCommutativeOperation() does not remove its input,
+  // so no need to return.
+  TryHandleAssociativeAndCommutativeOperation(instruction);
 }
 
 void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
@@ -1685,13 +1749,32 @@
   invoke->ReplaceWithExceptInReplacementAtIndex(select, 0);  // false at index 0
 }
 
+void InstructionSimplifierVisitor::SimplifyStringCharAt(HInvoke* invoke) {
+  HInstruction* str = invoke->InputAt(0);
+  HInstruction* index = invoke->InputAt(1);
+  uint32_t dex_pc = invoke->GetDexPc();
+  ArenaAllocator* arena = GetGraph()->GetArena();
+  // We treat String as an array to allow DCE and BCE to seamlessly work on strings,
+  // so create the HArrayLength, HBoundsCheck and HArrayGet.
+  HArrayLength* length = new (arena) HArrayLength(str, dex_pc, /* is_string_length */ true);
+  invoke->GetBlock()->InsertInstructionBefore(length, invoke);
+  HBoundsCheck* bounds_check =
+      new (arena) HBoundsCheck(index, length, dex_pc, invoke->GetDexMethodIndex());
+  invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke);
+  HArrayGet* array_get =
+      new (arena) HArrayGet(str, index, Primitive::kPrimChar, dex_pc, /* is_string_char_at */ true);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get);
+  bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment());
+  GetGraph()->SetHasBoundsChecks(true);
+}
+
 void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke) {
   HInstruction* str = invoke->InputAt(0);
   uint32_t dex_pc = invoke->GetDexPc();
   // We treat String as an array to allow DCE and BCE to seamlessly work on strings,
   // so create the HArrayLength.
-  HArrayLength* length = new (GetGraph()->GetArena()) HArrayLength(str, dex_pc);
-  length->MarkAsStringLength();
+  HArrayLength* length =
+      new (GetGraph()->GetArena()) HArrayLength(str, dex_pc, /* is_string_length */ true);
   HInstruction* replacement;
   if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) {
     // For String.isEmpty(), create the `HEqual` representing the `length == 0`.
@@ -1752,6 +1835,9 @@
     case Intrinsics::kDoubleDoubleToLongBits:
       SimplifyFP2Int(instruction);
       break;
+    case Intrinsics::kStringCharAt:
+      SimplifyStringCharAt(instruction);
+      break;
     case Intrinsics::kStringIsEmpty:
     case Intrinsics::kStringLength:
       SimplifyStringIsEmptyOrLength(instruction);
@@ -1782,4 +1868,150 @@
   }
 }
 
+// Replace code looking like
+//    OP y, x, const1
+//    OP z, y, const2
+// with
+//    OP z, x, const3
+// where OP is both an associative and a commutative operation.
+bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation(
+    HBinaryOperation* instruction) {
+  DCHECK(instruction->IsCommutative());
+
+  if (!Primitive::IsIntegralType(instruction->GetType())) {
+    return false;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  // Variable names as described above.
+  HConstant* const2;
+  HBinaryOperation* y;
+
+  if (instruction->InstructionTypeEquals(left) && right->IsConstant()) {
+    const2 = right->AsConstant();
+    y = left->AsBinaryOperation();
+  } else if (left->IsConstant() && instruction->InstructionTypeEquals(right)) {
+    const2 = left->AsConstant();
+    y = right->AsBinaryOperation();
+  } else {
+    // The node does not match the pattern.
+    return false;
+  }
+
+  // If `y` has more than one use, we do not perform the optimization
+  // because it might increase code size (e.g. if the new constant is
+  // no longer encodable as an immediate operand in the target ISA).
+  if (!y->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // GetConstantRight() can return both left and right constants
+  // for commutative operations.
+  HConstant* const1 = y->GetConstantRight();
+  if (const1 == nullptr) {
+    return false;
+  }
+
+  instruction->ReplaceInput(const1, 0);
+  instruction->ReplaceInput(const2, 1);
+  HConstant* const3 = instruction->TryStaticEvaluation();
+  DCHECK(const3 != nullptr);
+  instruction->ReplaceInput(y->GetLeastConstantLeft(), 0);
+  instruction->ReplaceInput(const3, 1);
+  RecordSimplification();
+  return true;
+}
+
+static HBinaryOperation* AsAddOrSub(HInstruction* binop) {
+  return (binop->IsAdd() || binop->IsSub()) ? binop->AsBinaryOperation() : nullptr;
+}
+
+// Helper function that performs addition statically, considering the result type.
+static int64_t ComputeAddition(Primitive::Type type, int64_t x, int64_t y) {
+  // Use the Compute() method for consistency with TryStaticEvaluation().
+  if (type == Primitive::kPrimInt) {
+    return HAdd::Compute<int32_t>(x, y);
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimLong);
+    return HAdd::Compute<int64_t>(x, y);
+  }
+}
+
+// Helper function that handles the child classes of HConstant
+// and returns an integer with the appropriate sign.
+static int64_t GetValue(HConstant* constant, bool is_negated) {
+  int64_t ret = Int64FromConstant(constant);
+  return is_negated ? -ret : ret;
+}
+
+// Replace code looking like
+//    OP1 y, x, const1
+//    OP2 z, y, const2
+// with
+//    OP3 z, x, const3
+// where OPx is either ADD or SUB, and at least one of OP{1,2} is SUB.
+bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
+    HBinaryOperation* instruction) {
+  DCHECK(instruction->IsAdd() || instruction->IsSub()) << instruction->DebugName();
+
+  Primitive::Type type = instruction->GetType();
+  if (!Primitive::IsIntegralType(type)) {
+    return false;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  // Variable names as described above.
+  HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant();
+  if (const2 == nullptr) {
+    return false;
+  }
+
+  HBinaryOperation* y = (AsAddOrSub(left) != nullptr)
+      ? left->AsBinaryOperation()
+      : AsAddOrSub(right);
+  // If y has more than one use, we do not perform the optimization because
+  // it might increase code size (e.g. if the new constant is no longer
+  // encodable as an immediate operand in the target ISA).
+  if ((y == nullptr) || !y->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  left = y->GetLeft();
+  HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant();
+  if (const1 == nullptr) {
+    return false;
+  }
+
+  HInstruction* x = (const1 == left) ? y->GetRight() : left;
+  // If both inputs are constants, let the constant folding pass deal with it.
+  if (x->IsConstant()) {
+    return false;
+  }
+
+  bool is_const2_negated = (const2 == right) && instruction->IsSub();
+  int64_t const2_val = GetValue(const2, is_const2_negated);
+  bool is_y_negated = (y == right) && instruction->IsSub();
+  right = y->GetRight();
+  bool is_const1_negated = is_y_negated ^ ((const1 == right) && y->IsSub());
+  int64_t const1_val = GetValue(const1, is_const1_negated);
+  bool is_x_negated = is_y_negated ^ ((x == right) && y->IsSub());
+  int64_t const3_val = ComputeAddition(type, const1_val, const2_val);
+  HBasicBlock* block = instruction->GetBlock();
+  HConstant* const3 = block->GetGraph()->GetConstant(type, const3_val);
+  ArenaAllocator* arena = instruction->GetArena();
+  HInstruction* z;
+
+  if (is_x_negated) {
+    z = new (arena) HSub(type, const3, x, instruction->GetDexPc());
+  } else {
+    z = new (arena) HAdd(type, x, const3, instruction->GetDexPc());
+  }
+
+  block->ReplaceAndRemoveInstructionWith(instruction, z);
+  RecordSimplification();
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index e4a711e..983d31d 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -19,6 +19,7 @@
 #include "common_arm64.h"
 #include "instruction_simplifier_shared.h"
 #include "mirror/array-inl.h"
+#include "mirror/string.h"
 
 namespace art {
 namespace arm64 {
@@ -30,7 +31,7 @@
 void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
                                                                      HInstruction* array,
                                                                      HInstruction* index,
-                                                                     int access_size) {
+                                                                     size_t data_offset) {
   if (kEmitCompilerReadBarrier) {
     // The read barrier instrumentation does not support the
     // HArm64IntermediateAddress instruction yet.
@@ -55,8 +56,7 @@
   // Proceed to extract the base address computation.
   ArenaAllocator* arena = GetGraph()->GetArena();
 
-  HIntConstant* offset =
-      GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value());
+  HIntConstant* offset = GetGraph()->GetIntConstant(data_offset);
   HArm64IntermediateAddress* address =
       new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc);
   address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
@@ -189,17 +189,20 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
+  size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
                                instruction->GetIndex(),
-                               Primitive::ComponentSize(instruction->GetType()));
+                               data_offset);
 }
 
 void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
+  size_t access_size = Primitive::ComponentSize(instruction->GetComponentType());
+  size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
                                instruction->GetIndex(),
-                               Primitive::ComponentSize(instruction->GetComponentType()));
+                               data_offset);
 }
 
 void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index da26998..4735f85 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -38,7 +38,7 @@
   void TryExtractArrayAccessAddress(HInstruction* access,
                                     HInstruction* array,
                                     HInstruction* index,
-                                    int access_size);
+                                    size_t data_offset);
   bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
   bool TryMergeIntoShifterOperand(HInstruction* use,
                                   HInstruction* bitfield_op,
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 83a5127..3429a8f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -239,6 +239,7 @@
 UNREACHABLE_INTRINSIC(Arch, LongCompare)            \
 UNREACHABLE_INTRINSIC(Arch, IntegerSignum)          \
 UNREACHABLE_INTRINSIC(Arch, LongSignum)             \
+UNREACHABLE_INTRINSIC(Arch, StringCharAt)           \
 UNREACHABLE_INTRINSIC(Arch, StringIsEmpty)          \
 UNREACHABLE_INTRINSIC(Arch, StringLength)           \
 UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)        \
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 7d1c2eb..f43f8ed 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -47,19 +47,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathARM slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathARM for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -524,8 +511,8 @@
       if (kEmitCompilerReadBarrier) {
         if (kUseBakerReadBarrier) {
           Location temp = locations->GetTemp(0);
-          codegen->GenerateArrayLoadWithBakerReadBarrier(
-              invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
           if (is_volatile) {
             __ dmb(ISH);
           }
@@ -581,10 +568,11 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
-    // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+    // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -919,9 +907,10 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS below).
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers.
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
   }
@@ -932,58 +921,18 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-
-  locations->AddTemp(Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) {
-  ArmAssembler* assembler = GetAssembler();
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array
-  const MemberOffset value_offset = mirror::String::ValueOffset();
-  // Location of count
-  const MemberOffset count_offset = mirror::String::CountOffset();
-
-  Register obj = locations->InAt(0).AsRegister<Register>();  // String object pointer.
-  Register idx = locations->InAt(1).AsRegister<Register>();  // Index of character.
-  Register out = locations->Out().AsRegister<Register>();    // Result character.
-
-  Register temp = locations->GetTemp(0).AsRegister<Register>();
-  Register array_temp = locations->GetTemp(1).AsRegister<Register>();
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  __ ldr(temp, Address(obj, count_offset.Int32Value()));          // temp = str.length.
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ cmp(idx, ShifterOperand(temp));
-  __ b(slow_path->GetEntryLabel(), CS);
-
-  __ add(array_temp, obj, ShifterOperand(value_offset.Int32Value()));  // array_temp := str.value.
-
-  // Load the value.
-  __ ldrh(out, Address(array_temp, idx, LSL, 1));                 // out := array_temp[idx].
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
   // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1384,6 +1333,12 @@
 }
 
 void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
   LocationSummary* locations = invoke->GetLocations();
   if (locations == nullptr) {
@@ -1468,11 +1423,11 @@
   }
 }
 
-// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
-// Note that this code path is not used (yet) because we do not
-// intrinsify methods that can go into the IntrinsicSlowPathARM
-// slow path.
 void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  DCHECK(!kEmitCompilerReadBarrier);
+
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index c8d6ddc..1685cf9 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -149,19 +149,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathARM64 slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathARM64 for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -791,8 +778,15 @@
     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
     UseScratchRegisterScope temps(masm);
     Register temp = temps.AcquireW();
-    codegen->GenerateArrayLoadWithBakerReadBarrier(
-        invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+    codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+                                                       trg_loc,
+                                                       base,
+                                                       /* offset */ 0U,
+                                                       /* index */ offset_loc,
+                                                       /* scale_factor */ 0U,
+                                                       temp,
+                                                       /* needs_null_check */ false,
+                                                       is_volatile);
   } else {
     // Other cases.
     MemOperand mem_op(base.X(), offset);
@@ -821,7 +815,8 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
@@ -1102,9 +1097,10 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS below).
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers.
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
   }
@@ -1119,59 +1115,18 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  // In case we need to go in the slow path, we can't have the output be the same
-  // as the input: the current liveness analysis considers the input to be live
-  // at the point of the call.
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) {
-  vixl::MacroAssembler* masm = GetVIXLAssembler();
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array
-  const MemberOffset value_offset = mirror::String::ValueOffset();
-  // Location of count
-  const MemberOffset count_offset = mirror::String::CountOffset();
-
-  Register obj = WRegisterFrom(locations->InAt(0));  // String object pointer.
-  Register idx = WRegisterFrom(locations->InAt(1));  // Index of character.
-  Register out = WRegisterFrom(locations->Out());    // Result character.
-
-  UseScratchRegisterScope temps(masm);
-  Register temp = temps.AcquireW();
-  Register array_temp = temps.AcquireW();            // We can trade this for worse scheduling.
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  __ Ldr(temp, HeapOperand(obj, count_offset));          // temp = str.length.
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ Cmp(idx, temp);
-  __ B(hs, slow_path->GetEntryLabel());
-
-  __ Add(array_temp, obj, Operand(value_offset.Int32Value()));  // array_temp := str.value.
-
-  // Load the value.
-  __ Ldrh(out, MemOperand(array_temp.X(), idx, UXTW, 1));  // out := array_temp[idx].
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             invoke->InputAt(1)->CanBeNull()
@@ -2062,6 +2017,12 @@
 // We want to use two temporary registers in order to reduce the register pressure in arm64.
 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   // Check to see if we have known failures that will cause us to have to bail out
   // to the runtime, and just generate the runtime call directly.
   HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
@@ -2114,6 +2075,10 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  DCHECK(!kEmitCompilerReadBarrier);
+
   vixl::MacroAssembler* masm = GetVIXLAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 140f56a..d4f44d6 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1872,54 +1872,6 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-// char java.lang.String.charAt(int index)
-void IntrinsicLocationsBuilderMIPS::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  // The inputs will be considered live at the last instruction and restored. This would overwrite
-  // the output with kNoOutputOverlap.
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  MipsAssembler* assembler = GetAssembler();
-
-  // Location of reference to data array
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-
-  Register obj = locations->InAt(0).AsRegister<Register>();
-  Register idx = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
-
-  // TODO: Maybe we can support range check elimination. Overall,
-  //       though, I think it's not worth the cost.
-  // TODO: For simplicity, the index parameter is requested in a
-  //       register, so different from Quick we will not optimize the
-  //       code for constants (which would save a register).
-
-  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  // Load the string size
-  __ Lw(TMP, obj, count_offset);
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  // Revert to slow path if idx is too large, or negative
-  __ Bgeu(idx, TMP, slow_path->GetEntryLabel());
-
-  // out = obj[2*idx].
-  __ Sll(TMP, idx, 1);                  // idx * 2
-  __ Addu(TMP, TMP, obj);               // Address of char at location idx
-  __ Lhu(out, TMP, value_offset);       // Load char at location idx
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 // int java.lang.String.compareTo(String anotherString)
 void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 6c4e64e..9243f4c 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1371,52 +1371,6 @@
   GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
 }
 
-// char java.lang.String.charAt(int index)
-void IntrinsicLocationsBuilderMIPS64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  Mips64Assembler* assembler = GetAssembler();
-
-  // Location of reference to data array
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
-  GpuRegister idx = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
-  // TODO: Maybe we can support range check elimination. Overall,
-  //       though, I think it's not worth the cost.
-  // TODO: For simplicity, the index parameter is requested in a
-  //       register, so different from Quick we will not optimize the
-  //       code for constants (which would save a register).
-
-  SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  // Load the string size
-  __ Lw(TMP, obj, count_offset);
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  // Revert to slow path if idx is too large, or negative
-  __ Bgeuc(idx, TMP, slow_path->GetEntryLabel());
-
-  // out = obj[2*idx].
-  __ Sll(TMP, idx, 1);                  // idx * 2
-  __ Daddu(TMP, TMP, obj);              // Address of char at location idx
-  __ Lhu(out, TMP, value_offset);       // Load char at location idx
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 // int java.lang.String.compareTo(String anotherString)
 void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 05377f9..031cd13 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -60,19 +60,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathX86 slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathX86 for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -1030,48 +1017,6 @@
   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
 }
 
-void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
-  // The inputs plus one temp.
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array.
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count.
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-
-  Register obj = locations->InAt(0).AsRegister<Register>();
-  Register idx = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  X86Assembler* assembler = GetAssembler();
-
-  __ cmpl(idx, Address(obj, count_offset));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ j(kAboveEqual, slow_path->GetEntryLabel());
-
-  // out = out[2*idx].
-  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
   // We need at least two of the positions or length to be an integer constant,
   // or else we won't have enough free registers.
@@ -1864,8 +1809,9 @@
       if (kEmitCompilerReadBarrier) {
         if (kUseBakerReadBarrier) {
           Location temp = locations->GetTemp(0);
-          codegen->GenerateArrayLoadWithBakerReadBarrier(
-              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          Address src(base, offset, ScaleFactor::TIMES_1, 0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, output_loc, base, src, temp, /* needs_null_check */ false);
         } else {
           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
           codegen->GenerateReadBarrierSlow(
@@ -1920,16 +1866,17 @@
     if (is_volatile) {
       // Need to use XMM to read volatile.
       locations->AddTemp(Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
     } else {
       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
     }
   } else {
-    locations->SetOut(Location::RequiresRegister());
+    locations->SetOut(Location::RequiresRegister(),
+                      can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
-    // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+    // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -2151,9 +2098,9 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented.
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
   // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
@@ -2278,6 +2225,15 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 67c2f3a..c5b44d4 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -54,19 +54,6 @@
   if (res == nullptr) {
     return false;
   }
-  if (kEmitCompilerReadBarrier && res->CanCall()) {
-    // Generating an intrinsic for this HInvoke may produce an
-    // IntrinsicSlowPathX86_64 slow path.  Currently this approach
-    // does not work when using read barriers, as the emitted
-    // calling sequence will make use of another slow path
-    // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect,
-    // ReadBarrierSlowPathX86_64 for HInvokeVirtual).  So we bail
-    // out in this case.
-    //
-    // TODO: Find a way to have intrinsics work with read barriers.
-    invoke->SetLocations(nullptr);
-    return false;
-  }
   return res->Intrinsified();
 }
 
@@ -891,49 +878,6 @@
   GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
 }
 
-void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
-  // The inputs plus one temp.
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-
-  // Location of reference to data array.
-  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
-  // Location of count.
-  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
-
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
-  CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
-  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
-  //       the cost.
-  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
-  //       we will not optimize the code for constants (which would save a register).
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  X86_64Assembler* assembler = GetAssembler();
-
-  __ cmpl(idx, Address(obj, count_offset));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ j(kAboveEqual, slow_path->GetEntryLabel());
-
-  // out = out[2*idx].
-  __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
   // Check to see if we have known failures that will cause us to have to bail out
   // to the runtime, and just generate the runtime call directly.
@@ -1122,14 +1066,20 @@
 
 
 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
 }
 
-// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
-// Note that this code path is not used (yet) because we do not
-// intrinsify methods that can go into the IntrinsicSlowPathX86_64
-// slow path.
 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  DCHECK(!kEmitCompilerReadBarrier);
+
   X86_64Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
@@ -1953,8 +1903,9 @@
       if (kEmitCompilerReadBarrier) {
         if (kUseBakerReadBarrier) {
           Location temp = locations->GetTemp(0);
-          codegen->GenerateArrayLoadWithBakerReadBarrier(
-              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+          Address src(base, offset, ScaleFactor::TIMES_1, 0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, output_loc, base, src, temp, /* needs_null_check */ false);
         } else {
           __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
           codegen->GenerateReadBarrierSlow(
@@ -1991,10 +1942,11 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(),
+                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
-    // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+    // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -2178,9 +2130,9 @@
   // The UnsafeCASObject intrinsic is missing a read barrier, and
   // therefore sometimes does not work as expected (b/25883050).
   // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented.
+  // implemented (see TODO in GenCAS).
   //
-  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
   // this intrinsic.
   if (kEmitCompilerReadBarrier) {
     return;
@@ -2296,6 +2248,15 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS).
+  //
+  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
+  // this intrinsic.
+  DCHECK(!kEmitCompilerReadBarrier);
+
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
 
@@ -2484,7 +2445,7 @@
                       : CTZ(static_cast<uint32_t>(value));
     }
     if (is_long) {
-      codegen->Load64BitValue(out, 1L << value);
+      codegen->Load64BitValue(out, 1ULL << value);
     } else {
       codegen->Load32BitValue(out, 1 << value);
     }
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 8a75a90..7347686 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -65,6 +65,16 @@
         is_singleton_and_not_returned_ = false;
         return;
       }
+      if ((user->IsUnresolvedInstanceFieldGet() && (reference_ == user->InputAt(0))) ||
+          (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(0)))) {
+        // The field is accessed in an unresolved way. We mark the object as a singleton to
+        // disable load/store optimizations on it.
+        // Note that we could optimize this case and still perform some optimizations until
+        // we hit the unresolved access, but disabling is the simplest.
+        is_singleton_ = false;
+        is_singleton_and_not_returned_ = false;
+        return;
+      }
       if (user->IsReturn()) {
         is_singleton_and_not_returned_ = false;
       }
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 63bbc2c..3f27c91 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -38,7 +38,13 @@
 class Location : public ValueObject {
  public:
   enum OutputOverlap {
+    // The liveness of the output overlaps the liveness of one or
+    // several input(s); the register allocator cannot reuse an
+    // input's location for the output's location.
     kOutputOverlap,
+    // The liveness of the output does not overlap the liveness of any
+    // input; the register allocator is allowed to reuse an input's
+    // location for the output's location.
     kNoOutputOverlap
   };
 
@@ -494,6 +500,10 @@
     return inputs_.size();
   }
 
+  // Set the output location.  Argument `overlaps` tells whether the
+  // output overlaps any of the inputs (if so, it cannot share the
+  // same register as one of the inputs); it is set to
+  // `Location::kOutputOverlap` by default for safety.
   void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) {
     DCHECK(output_.IsInvalid());
     output_overlaps_ = overlaps;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 4b4e549..d557f42 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -754,7 +754,7 @@
 }
 
 static void UpdateInputsUsers(HInstruction* instruction) {
-  auto&& inputs = instruction->GetInputs();
+  HInputsRef inputs = instruction->GetInputs();
   for (size_t i = 0; i < inputs.size(); ++i) {
     inputs[i]->AddUseAt(instruction, i);
   }
@@ -1312,8 +1312,8 @@
   DCHECK_EQ(GetKind(), other->GetKind());
   if (!InstructionDataEquals(other)) return false;
   if (GetType() != other->GetType()) return false;
-  auto&& inputs = GetInputs();
-  auto&& other_inputs = other->GetInputs();
+  HConstInputsRef inputs = GetInputs();
+  HConstInputsRef other_inputs = other->GetInputs();
   if (inputs.size() != other_inputs.size()) return false;
   for (size_t i = 0; i != inputs.size(); ++i) {
     if (inputs[i] != other_inputs[i]) return false;
@@ -2430,8 +2430,69 @@
   }
 }
 
+bool HLoadClass::InstructionDataEquals(const HInstruction* other) const {
+  const HLoadClass* other_load_class = other->AsLoadClass();
+  // TODO: To allow GVN for HLoadClass from different dex files, we should compare the type
+  // names rather than type indexes. However, we shall also have to re-think the hash code.
+  if (type_index_ != other_load_class->type_index_ ||
+      GetPackedFields() != other_load_class->GetPackedFields()) {
+    return false;
+  }
+  LoadKind load_kind = GetLoadKind();
+  if (HasAddress(load_kind)) {
+    return GetAddress() == other_load_class->GetAddress();
+  } else if (HasTypeReference(load_kind)) {
+    return IsSameDexFile(GetDexFile(), other_load_class->GetDexFile());
+  } else {
+    DCHECK(HasDexCacheReference(load_kind)) << load_kind;
+    // If the type indexes and dex files are the same, dex cache element offsets
+    // must also be the same, so we don't need to compare them.
+    return IsSameDexFile(GetDexFile(), other_load_class->GetDexFile());
+  }
+}
+
+void HLoadClass::SetLoadKindInternal(LoadKind load_kind) {
+  // Once sharpened, the load kind should not be changed again.
+  // Also, kReferrersClass should never be overwritten.
+  DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod);
+  SetPackedField<LoadKindField>(load_kind);
+
+  if (load_kind != LoadKind::kDexCacheViaMethod) {
+    RemoveAsUserOfInput(0u);
+    SetRawInputAt(0u, nullptr);
+  }
+  if (!NeedsEnvironment()) {
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
+  switch (rhs) {
+    case HLoadClass::LoadKind::kReferrersClass:
+      return os << "ReferrersClass";
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+      return os << "BootImageLinkTimeAddress";
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+      return os << "BootImageLinkTimePcRelative";
+    case HLoadClass::LoadKind::kBootImageAddress:
+      return os << "BootImageAddress";
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      return os << "DexCacheAddress";
+    case HLoadClass::LoadKind::kDexCachePcRelative:
+      return os << "DexCachePcRelative";
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      return os << "DexCacheViaMethod";
+    default:
+      LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
   const HLoadString* other_load_string = other->AsLoadString();
+  // TODO: To allow GVN for HLoadString from different dex files, we should compare the strings
+  // rather than their indexes. However, we shall also have to re-think the hash code.
   if (string_index_ != other_load_string->string_index_ ||
       GetPackedFields() != other_load_string->GetPackedFields()) {
     return false;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 711a6c1..0f0ef26 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -26,6 +26,7 @@
 #include "base/arena_object.h"
 #include "base/stl_util.h"
 #include "dex/compiler_enums.h"
+#include "dex_file.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
 #include "handle_scope.h"
@@ -85,6 +86,16 @@
 
 static constexpr uint32_t kNoDexPc = -1;
 
+inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) {
+  // For the purposes of the compiler, the dex files must actually be the same object
+  // if we want to safely treat them as the same. This is especially important for JIT
+  // as custom class loaders can open the same underlying file (or memory) multiple
+  // times and provide different class resolution but no two class loaders should ever
+  // use the same DexFile object - doing so is an unsupported hack that can lead to
+  // all sorts of weird failures.
+  return &lhs == &rhs;
+}
+
 enum IfCondition {
   // All types.
   kCondEQ,  // ==
@@ -161,6 +172,10 @@
 
   static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact);
 
+  static ReferenceTypeInfo Create(TypeHandle type_handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return Create(type_handle, type_handle->CannotBeAssignedFromOtherTypes());
+  }
+
   static ReferenceTypeInfo CreateUnchecked(TypeHandle type_handle, bool is_exact) {
     return ReferenceTypeInfo(type_handle, is_exact);
   }
@@ -1292,7 +1307,13 @@
   M(Arm64IntermediateAddress, Instruction)
 #endif
 
+#ifndef ART_ENABLE_CODEGEN_mips
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                           \
+  M(MipsComputeBaseMethodAddress, Instruction)                          \
+  M(MipsDexCacheArraysBase, Instruction)
+#endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M)
 
@@ -1400,6 +1421,21 @@
   typename HUseList<T>::iterator before_use_node_;
 };
 
+// Helper class that extracts the input instruction from HUserRecord<HInstruction*>.
+// This is used for HInstruction::GetInputs() to return a container wrapper providing
+// HInstruction* values even though the underlying container has HUserRecord<>s.
+struct HInputExtractor {
+  HInstruction* operator()(HUserRecord<HInstruction*>& record) const {
+    return record.GetInstruction();
+  }
+  const HInstruction* operator()(const HUserRecord<HInstruction*>& record) const {
+    return record.GetInstruction();
+  }
+};
+
+using HInputsRef = TransformArrayRef<HUserRecord<HInstruction*>, HInputExtractor>;
+using HConstInputsRef = TransformArrayRef<const HUserRecord<HInstruction*>, HInputExtractor>;
+
 /**
  * Side-effects representation.
  *
@@ -1789,20 +1825,12 @@
         const_cast<HInstruction*>(this)->GetInputRecords());
   }
 
-  auto GetInputs() {
-    return MakeTransformArrayRef(
-        GetInputRecords(),
-        [](HUserRecord<HInstruction*>& record) -> HInstruction* {
-            return record.GetInstruction();
-        });
+  HInputsRef GetInputs() {
+    return MakeTransformArrayRef(GetInputRecords(), HInputExtractor());
   }
 
-  auto GetInputs() const {
-    return MakeTransformArrayRef(
-        GetInputRecords(),
-        [](const HUserRecord<HInstruction*>& record) -> const HInstruction* {
-            return record.GetInstruction();
-        });
+  HConstInputsRef GetInputs() const {
+    return MakeTransformArrayRef(GetInputRecords(), HInputExtractor());
   }
 
   size_t InputCount() const { return GetInputRecords().size(); }
@@ -1920,6 +1948,14 @@
     environment_ = environment;
   }
 
+  void InsertRawEnvironment(HEnvironment* environment) {
+    DCHECK(environment_ != nullptr);
+    DCHECK_EQ(environment->GetHolder(), this);
+    DCHECK(environment->GetParent() == nullptr);
+    environment->parent_ = environment_;
+    environment_ = environment;
+  }
+
   void RemoveEnvironment();
 
   // Set the environment of this instruction, copying it from `environment`. While
@@ -2385,7 +2421,7 @@
   bool IsDead() const { return !IsLive(); }
   bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); }
 
-  bool IsVRegEquivalentOf(HInstruction* other) const {
+  bool IsVRegEquivalentOf(const HInstruction* other) const {
     return other != nullptr
         && other->IsPhi()
         && other->AsPhi()->GetBlock() == GetBlock()
@@ -3137,7 +3173,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x == y; }
+  template <typename T> static bool Compute(T x, T y) { return x == y; }
 
   DISALLOW_COPY_AND_ASSIGN(HEqual);
 };
@@ -3180,7 +3216,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x != y; }
+  template <typename T> static bool Compute(T x, T y) { return x != y; }
 
   DISALLOW_COPY_AND_ASSIGN(HNotEqual);
 };
@@ -3217,7 +3253,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x < y; }
+  template <typename T> static bool Compute(T x, T y) { return x < y; }
 
   DISALLOW_COPY_AND_ASSIGN(HLessThan);
 };
@@ -3254,7 +3290,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x <= y; }
+  template <typename T> static bool Compute(T x, T y) { return x <= y; }
 
   DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual);
 };
@@ -3291,7 +3327,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x > y; }
+  template <typename T> static bool Compute(T x, T y) { return x > y; }
 
   DISALLOW_COPY_AND_ASSIGN(HGreaterThan);
 };
@@ -3328,7 +3364,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const { return x >= y; }
+  template <typename T> static bool Compute(T x, T y) { return x >= y; }
 
   DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual);
 };
@@ -3366,7 +3402,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const {
+  template <typename T> static bool Compute(T x, T y) {
     return MakeUnsigned(x) < MakeUnsigned(y);
   }
 
@@ -3406,7 +3442,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const {
+  template <typename T> static bool Compute(T x, T y) {
     return MakeUnsigned(x) <= MakeUnsigned(y);
   }
 
@@ -3446,7 +3482,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const {
+  template <typename T> static bool Compute(T x, T y) {
     return MakeUnsigned(x) > MakeUnsigned(y);
   }
 
@@ -3486,7 +3522,7 @@
   }
 
  private:
-  template <typename T> bool Compute(T x, T y) const {
+  template <typename T> static bool Compute(T x, T y) {
     return MakeUnsigned(x) >= MakeUnsigned(y);
   }
 
@@ -4159,7 +4195,7 @@
     DCHECK_EQ(result_type, Primitive::PrimitiveKind(input->GetType()));
   }
 
-  template <typename T> T Compute(T x) const { return -x; }
+  template <typename T> static T Compute(T x) { return -x; }
 
   HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
@@ -4229,7 +4265,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x + y; }
+  template <typename T> static T Compute(T x, T y) { return x + y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4262,7 +4298,7 @@
        uint32_t dex_pc = kNoDexPc)
       : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {}
 
-  template <typename T> T Compute(T x, T y) const { return x - y; }
+  template <typename T> static T Compute(T x, T y) { return x - y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4297,7 +4333,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x * y; }
+  template <typename T> static T Compute(T x, T y) { return x * y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4463,7 +4499,7 @@
   }
 
   template <typename T>
-  T Compute(T value, int32_t distance, int32_t max_shift_distance) const {
+  static T Compute(T value, int32_t distance, int32_t max_shift_distance) {
     return value << (distance & max_shift_distance);
   }
 
@@ -4509,7 +4545,7 @@
   }
 
   template <typename T>
-  T Compute(T value, int32_t distance, int32_t max_shift_distance) const {
+  static T Compute(T value, int32_t distance, int32_t max_shift_distance) {
     return value >> (distance & max_shift_distance);
   }
 
@@ -4555,7 +4591,7 @@
   }
 
   template <typename T>
-  T Compute(T value, int32_t distance, int32_t max_shift_distance) const {
+  static T Compute(T value, int32_t distance, int32_t max_shift_distance) {
     typedef typename std::make_unsigned<T>::type V;
     V ux = static_cast<V>(value);
     return static_cast<T>(ux >> (distance & max_shift_distance));
@@ -4601,7 +4637,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x & y; }
+  template <typename T> static T Compute(T x, T y) { return x & y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4638,7 +4674,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x | y; }
+  template <typename T> static T Compute(T x, T y) { return x | y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4675,7 +4711,7 @@
 
   bool IsCommutative() const OVERRIDE { return true; }
 
-  template <typename T> T Compute(T x, T y) const { return x ^ y; }
+  template <typename T> static T Compute(T x, T y) { return x ^ y; }
 
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(
@@ -4711,7 +4747,7 @@
   }
 
   template <typename T>
-  T Compute(T value, int32_t distance, int32_t max_shift_value) const {
+  static T Compute(T value, int32_t distance, int32_t max_shift_value) {
     typedef typename std::make_unsigned<T>::type V;
     V ux = static_cast<V>(value);
     if ((distance & max_shift_value) == 0) {
@@ -4807,7 +4843,7 @@
     return true;
   }
 
-  template <typename T> T Compute(T x) const { return ~x; }
+  template <typename T> static T Compute(T x) { return ~x; }
 
   HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
     return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
@@ -4840,7 +4876,7 @@
     return true;
   }
 
-  template <typename T> bool Compute(T x) const {
+  template <typename T> static bool Compute(T x) {
     DCHECK(IsUint<1>(x)) << x;
     return !x;
   }
@@ -5006,7 +5042,7 @@
   }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
+    return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
   }
 
   size_t ComputeHashCode() const OVERRIDE {
@@ -5053,7 +5089,7 @@
   }
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
-    return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize;
+    return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
   }
 
   const FieldInfo& GetFieldInfo() const { return field_info_; }
@@ -5079,8 +5115,13 @@
 
 class HArrayGet FINAL : public HExpression<2> {
  public:
-  HArrayGet(HInstruction* array, HInstruction* index, Primitive::Type type, uint32_t dex_pc)
+  HArrayGet(HInstruction* array,
+            HInstruction* index,
+            Primitive::Type type,
+            uint32_t dex_pc,
+            bool is_string_char_at = false)
       : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) {
+    SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at);
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
   }
@@ -5114,12 +5155,24 @@
     return result;
   }
 
+  bool IsStringCharAt() const { return GetPackedFlag<kFlagIsStringCharAt>(); }
+
   HInstruction* GetArray() const { return InputAt(0); }
   HInstruction* GetIndex() const { return InputAt(1); }
 
   DECLARE_INSTRUCTION(ArrayGet);
 
  private:
+  // We treat a String as an array, creating the HArrayGet from String.charAt()
+  // intrinsic in the instruction simplifier. We can always determine whether
+  // a particular HArrayGet is actually a String.charAt() by looking at the type
+  // of the input but that requires holding the mutator lock, so we prefer to use
+  // a flag, so that code generators don't need to do the locking.
+  static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits;
+  static constexpr size_t kNumberOfArrayGetPackedBits = kFlagIsStringCharAt + 1;
+  static_assert(kNumberOfArrayGetPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+
   DISALLOW_COPY_AND_ASSIGN(HArrayGet);
 };
 
@@ -5225,8 +5278,9 @@
 
 class HArrayLength FINAL : public HExpression<1> {
  public:
-  HArrayLength(HInstruction* array, uint32_t dex_pc)
+  HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+    SetPackedFlag<kFlagIsStringLength>(is_string_length);
     // Note that arrays do not change length, so the instruction does not
     // depend on any write.
     SetRawInputAt(0, array);
@@ -5240,7 +5294,6 @@
     return obj == InputAt(0);
   }
 
-  void MarkAsStringLength() { SetPackedFlag<kFlagIsStringLength>(); }
   bool IsStringLength() const { return GetPackedFlag<kFlagIsStringLength>(); }
 
   DECLARE_INSTRUCTION(ArrayLength);
@@ -5263,8 +5316,12 @@
  public:
   // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
   // constructor.
-  HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
-      : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
+  HBoundsCheck(HInstruction* index,
+               HInstruction* length,
+               uint32_t dex_pc,
+               uint32_t string_char_at_method_index = DexFile::kDexNoIndex)
+      : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc),
+        string_char_at_method_index_(string_char_at_method_index) {
     DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(index->GetType()));
     SetRawInputAt(0, index);
     SetRawInputAt(1, length);
@@ -5279,11 +5336,23 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
+  bool IsStringCharAt() const { return GetStringCharAtMethodIndex() != DexFile::kDexNoIndex; }
+  uint32_t GetStringCharAtMethodIndex() const { return string_char_at_method_index_; }
+
   HInstruction* GetIndex() const { return InputAt(0); }
 
   DECLARE_INSTRUCTION(BoundsCheck);
 
  private:
+  // We treat a String as an array, creating the HBoundsCheck from String.charAt()
+  // intrinsic in the instruction simplifier. We want to include the String.charAt()
+  // in the stack trace if we actually throw the StringIndexOutOfBoundsException,
+  // so we need to create an HEnvironment which will be translated to an InlineInfo
+  // indicating the extra stack frame. Since we add this HEnvironment quite late,
+  // in the PrepareForRegisterAllocation pass, we need to remember the method index
+  // from the invoke as we don't want to look again at the dex bytecode.
+  uint32_t string_char_at_method_index_;  // DexFile::kDexNoIndex if regular array.
+
   DISALLOW_COPY_AND_ASSIGN(HBoundsCheck);
 };
 
@@ -5329,8 +5398,44 @@
 /**
  * Instruction to load a Class object.
  */
-class HLoadClass FINAL : public HExpression<1> {
+class HLoadClass FINAL : public HInstruction {
  public:
+  // Determines how to load the Class.
+  enum class LoadKind {
+    // Use the Class* from the method's own ArtMethod*.
+    kReferrersClass,
+
+    // Use boot image Class* address that will be known at link time.
+    // Used for boot image classes referenced by boot image code in non-PIC mode.
+    kBootImageLinkTimeAddress,
+
+    // Use PC-relative boot image Class* address that will be known at link time.
+    // Used for boot image classes referenced by boot image code in PIC mode.
+    kBootImageLinkTimePcRelative,
+
+    // Use a known boot image Class* address, embedded in the code by the codegen.
+    // Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
+    // Note: codegen needs to emit a linker patch if indicated by compiler options'
+    // GetIncludePatchInformation().
+    kBootImageAddress,
+
+    // Load from the resolved types array at an absolute address.
+    // Used for classes outside the boot image referenced by JIT-compiled code.
+    kDexCacheAddress,
+
+    // Load from resolved types array in the dex cache using a PC-relative load.
+    // Used for classes outside boot image when we know that we can access
+    // the dex cache arrays using a PC-relative load.
+    kDexCachePcRelative,
+
+    // Load from resolved types array accessed through the class loaded from
+    // the compiled method's own ArtMethod*. This is the default access type when
+    // all other types are unavailable.
+    kDexCacheViaMethod,
+
+    kLast = kDexCacheViaMethod
+  };
+
   HLoadClass(HCurrentMethod* current_method,
              uint16_t type_index,
              const DexFile& dex_file,
@@ -5338,7 +5443,8 @@
              uint32_t dex_pc,
              bool needs_access_check,
              bool is_in_dex_cache)
-      : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
+      : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
+        special_input_(HUserRecord<HInstruction*>(current_method)),
         type_index_(type_index),
         dex_file_(dex_file),
         loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
@@ -5346,26 +5452,47 @@
     // methods so we can't possibly end up in this situation.
     DCHECK(!is_referrers_class || !needs_access_check);
 
-    SetPackedFlag<kFlagIsReferrersClass>(is_referrers_class);
+    SetPackedField<LoadKindField>(
+        is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod);
     SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
     SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache);
     SetPackedFlag<kFlagGenerateClInitCheck>(false);
-    SetRawInputAt(0, current_method);
+  }
+
+  void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) {
+    DCHECK(HasAddress(load_kind));
+    load_data_.address = address;
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithTypeReference(LoadKind load_kind,
+                                    const DexFile& dex_file,
+                                    uint32_t type_index) {
+    DCHECK(HasTypeReference(load_kind));
+    DCHECK(IsSameDexFile(dex_file_, dex_file));
+    DCHECK_EQ(type_index_, type_index);
+    SetLoadKindInternal(load_kind);
+  }
+
+  void SetLoadKindWithDexCacheReference(LoadKind load_kind,
+                                        const DexFile& dex_file,
+                                        uint32_t element_index) {
+    DCHECK(HasDexCacheReference(load_kind));
+    DCHECK(IsSameDexFile(dex_file_, dex_file));
+    load_data_.dex_cache_element_index = element_index;
+    SetLoadKindInternal(load_kind);
+  }
+
+  LoadKind GetLoadKind() const {
+    return GetPackedField<LoadKindField>();
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
-    // Note that we don't need to test for generate_clinit_check_.
-    // Whether or not we need to generate the clinit check is processed in
-    // prepare_for_register_allocator based on existing HInvokes and HClinitChecks.
-    return other->AsLoadClass()->type_index_ == type_index_ &&
-        other->AsLoadClass()->GetPackedFields() == GetPackedFields();
-  }
+  bool InstructionDataEquals(const HInstruction* other) const;
 
   size_t ComputeHashCode() const OVERRIDE { return type_index_; }
 
-  uint16_t GetTypeIndex() const { return type_index_; }
   bool CanBeNull() const OVERRIDE { return false; }
 
   bool NeedsEnvironment() const OVERRIDE {
@@ -5400,7 +5527,15 @@
     loaded_class_rti_ = rti;
   }
 
-  const DexFile& GetDexFile() { return dex_file_; }
+  uint32_t GetTypeIndex() const { return type_index_; }
+  const DexFile& GetDexFile() const { return dex_file_; }
+
+  uint32_t GetDexCacheElementOffset() const;
+
+  uint64_t GetAddress() const {
+    DCHECK(HasAddress(GetLoadKind()));
+    return load_data_.address;
+  }
 
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return !IsReferrersClass(); }
 
@@ -5408,30 +5543,96 @@
     return SideEffects::CanTriggerGC();
   }
 
-  bool IsReferrersClass() const { return GetPackedFlag<kFlagIsReferrersClass>(); }
+  bool IsReferrersClass() const { return GetLoadKind() == LoadKind::kReferrersClass; }
   bool NeedsAccessCheck() const { return GetPackedFlag<kFlagNeedsAccessCheck>(); }
   bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); }
   bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); }
 
+  void MarkInDexCache() {
+    SetPackedFlag<kFlagIsInDexCache>(true);
+    DCHECK(!NeedsEnvironment());
+    RemoveEnvironment();
+    SetSideEffects(SideEffects::None());
+  }
+
+  void AddSpecialInput(HInstruction* special_input);
+
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(
+        &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
+  }
+
+  Primitive::Type GetType() const OVERRIDE {
+    return Primitive::kPrimNot;
+  }
+
   DECLARE_INSTRUCTION(LoadClass);
 
  private:
-  static constexpr size_t kFlagIsReferrersClass    = kNumberOfExpressionPackedBits;
-  static constexpr size_t kFlagNeedsAccessCheck    = kFlagIsReferrersClass + 1;
+  static constexpr size_t kFlagNeedsAccessCheck    = kNumberOfGenericPackedBits;
   static constexpr size_t kFlagIsInDexCache        = kFlagNeedsAccessCheck + 1;
   // Whether this instruction must generate the initialization check.
   // Used for code generation.
   static constexpr size_t kFlagGenerateClInitCheck = kFlagIsInDexCache + 1;
-  static constexpr size_t kNumberOfLoadClassPackedBits = kFlagGenerateClInitCheck + 1;
+  static constexpr size_t kFieldLoadKind           = kFlagGenerateClInitCheck + 1;
+  static constexpr size_t kFieldLoadKindSize =
+      MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
+  static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize;
   static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");
+  using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
+
+  static bool HasTypeReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageLinkTimeAddress ||
+        load_kind == LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == LoadKind::kDexCacheViaMethod ||
+        load_kind == LoadKind::kReferrersClass;
+  }
+
+  static bool HasAddress(LoadKind load_kind) {
+    return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress;
+  }
+
+  static bool HasDexCacheReference(LoadKind load_kind) {
+    return load_kind == LoadKind::kDexCachePcRelative;
+  }
+
+  void SetLoadKindInternal(LoadKind load_kind);
+
+  // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass.
+  // For other load kinds it's empty or possibly some architecture-specific instruction
+  // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative.
+  HUserRecord<HInstruction*> special_input_;
 
   const uint16_t type_index_;
   const DexFile& dex_file_;
 
+  union {
+    uint32_t dex_cache_element_index;   // Only for dex cache reference.
+    uint64_t address;  // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets.
+  } load_data_;
+
   ReferenceTypeInfo loaded_class_rti_;
 
   DISALLOW_COPY_AND_ASSIGN(HLoadClass);
 };
+std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
+
+// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
+inline uint32_t HLoadClass::GetDexCacheElementOffset() const {
+  DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind();
+  return load_data_.dex_cache_element_index;
+}
+
+// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
+inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
+  // The special input is used for PC-relative loads on some architectures.
+  DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+         GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
+  DCHECK(special_input_.GetInstruction() == nullptr);
+  special_input_ = HUserRecord<HInstruction*>(special_input);
+  special_input->AddUseAt(this, 0);
+}
 
 class HLoadString FINAL : public HInstruction {
  public:
@@ -5599,6 +5800,9 @@
 
   void SetLoadKindInternal(LoadKind load_kind);
 
+  // The special input is the HCurrentMethod for kDexCacheViaMethod.
+  // For other load kinds it's empty or possibly some architecture-specific instruction
+  // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative.
   HUserRecord<HInstruction*> special_input_;
 
   // String index serves also as the hash code and it's also needed for slow-paths,
@@ -6353,6 +6557,9 @@
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "nodes_arm64.h"
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "nodes_mips.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "nodes_x86.h"
 #endif
@@ -6572,16 +6779,6 @@
   }
 }
 
-inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) {
-  // For the purposes of the compiler, the dex files must actually be the same object
-  // if we want to safely treat them as the same. This is especially important for JIT
-  // as custom class loaders can open the same underlying file (or memory) multiple
-  // times and provide different class resolution but no two class loaders should ever
-  // use the same DexFile object - doing so is an unsupported hack that can lead to
-  // all sorts of weird failures.
-  return &lhs == &rhs;
-}
-
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   inline bool HInstruction::Is##type() const { return GetKind() == k##type; }  \
   inline const H##type* HInstruction::As##type() const {                       \
diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h
new file mode 100644
index 0000000..de77245
--- /dev/null
+++ b/compiler/optimizing/nodes_mips.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
+
+namespace art {
+
+// Compute the address of the method for MIPS Constant area support.
+class HMipsComputeBaseMethodAddress : public HExpression<0> {
+ public:
+  // Treat the value as an int32_t, but it is really a 32 bit native pointer.
+  HMipsComputeBaseMethodAddress()
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {}
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress);
+};
+
+class HMipsDexCacheArraysBase : public HExpression<0> {
+ public:
+  explicit HMipsDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // We'll maximize the range of a single load instruction for dex cache array accesses
+    // by aligning offset -32768 with the offset of the first used element.
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(MipsDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 764160a..05eb063 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -32,21 +32,21 @@
 // 0x00000012: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
-    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0xD7, 0x42, 0xA9,
-    0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+    0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
+    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9,
+    0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44,
+    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x44, 0x95, 0x04, 0x9E, 0x02, 0x44,
     0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
-    0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+    0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: str x0, [sp, #-64]!
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: stp x20, x21, [sp, #40]
+// 0x00000004: str x20, [sp, #40]
 // 0x00000008: .cfi_offset: r20 at cfa-24
-// 0x00000008: .cfi_offset: r21 at cfa-16
-// 0x00000008: str lr, [sp, #56]
+// 0x00000008: stp x21, lr, [sp, #48]
+// 0x0000000c: .cfi_offset: r21 at cfa-16
 // 0x0000000c: .cfi_offset: r30 at cfa-8
 // 0x0000000c: stp d8, d9, [sp, #24]
 // 0x00000010: .cfi_offset_extended: r72 at cfa-40
@@ -55,10 +55,10 @@
 // 0x00000010: ldp d8, d9, [sp, #24]
 // 0x00000014: .cfi_restore_extended: r72
 // 0x00000014: .cfi_restore_extended: r73
-// 0x00000014: ldp x20, x21, [sp, #40]
+// 0x00000014: ldr x20, [sp, #40]
 // 0x00000018: .cfi_restore: r20
-// 0x00000018: .cfi_restore: r21
-// 0x00000018: ldr lr, [sp, #56]
+// 0x00000018: ldp x21, lr, [sp, #48]
+// 0x0000001c: .cfi_restore: r21
 // 0x0000001c: .cfi_restore: r30
 // 0x0000001c: add sp, sp, #0x40 (64)
 // 0x00000020: .cfi_def_cfa_offset: 0
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c9a4bfe..d703b0f 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -28,6 +28,11 @@
 #include "instruction_simplifier_arm64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "dex_cache_array_fixups_mips.h"
+#include "pc_relative_fixups_mips.h"
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "pc_relative_fixups_x86.h"
 #endif
@@ -462,6 +467,20 @@
       break;
     }
 #endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips: {
+      mips::PcRelativeFixups* pc_relative_fixups =
+          new (arena) mips::PcRelativeFixups(graph, codegen, stats);
+      mips::DexCacheArrayFixups* dex_cache_array_fixups =
+          new (arena) mips::DexCacheArrayFixups(graph, stats);
+      HOptimization* mips_optimizations[] = {
+          pc_relative_fixups,
+          dex_cache_array_fixups
+      };
+      RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86: {
       x86::PcRelativeFixups* pc_relative_fixups =
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
new file mode 100644
index 0000000..ba405cd
--- /dev/null
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pc_relative_fixups_mips.h"
+#include "code_generator_mips.h"
+#include "intrinsics_mips.h"
+
+namespace art {
+namespace mips {
+
+/**
+ * Finds instructions that need the constant area base as an input.
+ */
+class PCRelativeHandlerVisitor : public HGraphVisitor {
+ public:
+  PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)),
+        base_(nullptr) {}
+
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    HandleInvoke(invoke);
+  }
+
+  void InitializePCRelativeBasePointer() {
+    // Ensure we only initialize the pointer once.
+    if (base_ != nullptr) {
+      return;
+    }
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HMipsComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
+    DCHECK(base_ != nullptr);
+  }
+
+  void HandleInvoke(HInvoke* invoke) {
+    // If this is an invoke-static/-direct with PC-relative dex cache array
+    // addressing, we need the PC-relative address base.
+    HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+    if (invoke_static_or_direct != nullptr) {
+      HInvokeStaticOrDirect::MethodLoadKind method_load_kind =
+          invoke_static_or_direct->GetMethodLoadKind();
+      HInvokeStaticOrDirect::CodePtrLocation code_ptr_location =
+          invoke_static_or_direct->GetCodePtrLocation();
+
+      bool has_extra_input =
+          (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) ||
+          (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup);
+
+      // We can't add a pointer to the constant area if we already have a current
+      // method pointer. This may arise when sharpening doesn't remove the current
+      // method pointer from the invoke.
+      if (invoke_static_or_direct->HasCurrentMethodInput()) {
+        DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
+        CHECK(!has_extra_input);  // TODO: review this.
+        return;
+      }
+
+      if (has_extra_input && !WillHaveCallFreeIntrinsicsCodeGen(invoke)) {
+        InitializePCRelativeBasePointer();
+        // Add the extra parameter base_.
+        invoke_static_or_direct->AddSpecialInput(base_);
+      }
+    }
+  }
+
+  bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) {
+    if (invoke->GetIntrinsic() != Intrinsics::kNone) {
+      // This invoke may have intrinsic code generation defined. However, we must
+      // now also determine if this code generation is truly there and call-free
+      // (not unimplemented, no bail on instruction features, or call on slow path).
+      // This is done by actually calling the locations builder on the instruction
+      // and clearing out the locations once result is known. We assume this
+      // call only has creating locations as side effects!
+      IntrinsicLocationsBuilderMIPS builder(codegen_);
+      bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
+      invoke->SetLocations(nullptr);
+      return success;
+    }
+    return false;
+  }
+
+  CodeGeneratorMIPS* codegen_;
+
+  // The generated HMipsComputeBaseMethodAddress in the entry block needed as an
+  // input to the HMipsLoadFromConstantTable instructions.
+  HMipsComputeBaseMethodAddress* base_;
+};
+
+void PcRelativeFixups::Run() {
+  CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
+  if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
+    // Do nothing for R6 because it has PC-relative addressing.
+    // TODO: review. Move this check into RunArchOptimizations()?
+    return;
+  }
+  if (graph_->HasIrreducibleLoops()) {
+    // Do not run this optimization, as irreducible loops do not work with an instruction
+    // that can be live-in at the irreducible loop header.
+    return;
+  }
+  PCRelativeHandlerVisitor visitor(graph_, codegen_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
+}
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h
new file mode 100644
index 0000000..1e8b071
--- /dev/null
+++ b/compiler/optimizing/pc_relative_fixups_mips.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_
+#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+
+namespace mips {
+
+class PcRelativeFixups : public HOptimization {
+ public:
+  PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "pc_relative_fixups_mips", stats),
+        codegen_(codegen) {}
+
+  void Run() OVERRIDE;
+
+ private:
+  CodeGenerator* codegen_;
+};
+
+}  // namespace mips
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index cb2fc0a..921f3df 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -80,6 +80,15 @@
     HandleInvoke(invoke);
   }
 
+  void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+    HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
+    if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
+        load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+      InitializePCRelativeBasePointer();
+      load_class->AddSpecialInput(base_);
+    }
+  }
+
   void VisitLoadString(HLoadString* load_string) OVERRIDE {
     HLoadString::LoadKind load_kind = load_string->GetLoadKind();
     if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
@@ -202,7 +211,7 @@
     }
 
     // Ensure that we can load FP arguments from the constant area.
-    auto&& inputs = invoke->GetInputs();
+    HInputsRef inputs = invoke->GetInputs();
     for (size_t i = 0; i < inputs.size(); i++) {
       HConstant* input = inputs[i]->AsConstant();
       if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index c941c0c..8fb5396 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -40,6 +40,22 @@
 
 void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
   check->ReplaceWith(check->InputAt(0));
+  if (check->IsStringCharAt()) {
+    // Add a fake environment for String.charAt() inline info as we want
+    // the exception to appear as being thrown from there.
+    const DexFile& dex_file = check->GetEnvironment()->GetDexFile();
+    DCHECK_STREQ(PrettyMethod(check->GetStringCharAtMethodIndex(), dex_file).c_str(),
+                 "char java.lang.String.charAt(int)");
+    ArenaAllocator* arena = GetGraph()->GetArena();
+    HEnvironment* environment = new (arena) HEnvironment(arena,
+                                                         /* number_of_vregs */ 0u,
+                                                         dex_file,
+                                                         check->GetStringCharAtMethodIndex(),
+                                                         /* dex_pc */ DexFile::kDexNoIndex,
+                                                         kVirtual,
+                                                         check);
+    check->InsertRawEnvironment(environment);
+  }
 }
 
 void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) {
@@ -130,7 +146,11 @@
     instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0);
     // The allocation entry point that deals with access checks does not work with inlined
     // methods, so we need to check whether this allocation comes from an inlined method.
-    if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) {
+    // We also need to make the same check as for moving clinit check, whether the HLoadClass
+    // has the clinit check responsibility or not (HLoadClass can throw anyway).
+    if (has_only_one_use &&
+        !instruction->GetEnvironment()->IsFromInlinedInvoke() &&
+        CanMoveClinitCheck(load_class, instruction)) {
       // We can remove the load class from the graph. If it needed access checks, we delegate
       // the access check to the allocation.
       if (load_class->NeedsAccessCheck()) {
@@ -187,7 +207,8 @@
                                                       HInstruction* user) const {
   // Determine if input and user come from the same dex instruction, so that we can move
   // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user)
-  // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user).
+  // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user),
+  // or from HLoadClass (input) to HNewInstance (user).
 
   // Start with a quick dex pc check.
   if (user->GetDexPc() != input->GetDexPc()) {
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index f9bef68..5891350 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -39,7 +39,7 @@
   }
 
   void PrintPostInstruction(HInstruction* instruction) {
-    auto&& inputs = instruction->GetInputs();
+    HConstInputsRef inputs = instruction->GetInputs();
     if (!inputs.empty()) {
       PrintString("(");
       bool first = true;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 3e6adcb..965d5ee 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -46,13 +46,6 @@
   return *cache;
 }
 
-// Returns true if klass is admissible to the propagation: non-null and resolved.
-// For an array type, we also check if the component type is admissible.
-static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
-  return klass != nullptr && klass->IsResolved() &&
-      (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType()));
-}
-
 ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() {
   return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_);
 }
@@ -823,10 +816,10 @@
 void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
   DCHECK(instr->IsLive());
 
-  auto&& inputs = instr->GetInputs();
+  HInputsRef inputs = instr->GetInputs();
   size_t first_input_index_not_null = 0;
   while (first_input_index_not_null < inputs.size() &&
-      inputs[first_input_index_not_null]->IsNullConstant()) {
+         inputs[first_input_index_not_null]->IsNullConstant()) {
     first_input_index_not_null++;
   }
   if (first_input_index_not_null == inputs.size()) {
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 2106be6..edd83bf 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -42,6 +42,14 @@
 
   void Run() OVERRIDE;
 
+  // Returns true if klass is admissible to the propagation: non-null and resolved.
+  // For an array type, we also check if the component type is admissible.
+  static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return klass != nullptr &&
+           klass->IsResolved() &&
+           (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType()));
+  }
+
   static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation";
 
  private:
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 4a6b835..9d99668 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -753,7 +753,7 @@
   if (defined_by != nullptr && !current->IsSplit()) {
     LocationSummary* locations = defined_by->GetLocations();
     if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
-      auto&& inputs = defined_by->GetInputs();
+      HInputsRef inputs = defined_by->GetInputs();
       for (size_t i = 0; i < inputs.size(); ++i) {
         // Take the last interval of the input. It is the location of that interval
         // that will be used at `defined_by`.
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 08bd35f..97f34e6 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -40,13 +40,14 @@
       HInstruction* instruction = it.Current();
       if (instruction->IsInvokeStaticOrDirect()) {
         ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
+      } else if (instruction->IsLoadClass()) {
+        ProcessLoadClass(instruction->AsLoadClass());
       } else if (instruction->IsLoadString()) {
         ProcessLoadString(instruction->AsLoadString());
       }
       // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder
       //       here. Rewrite it to avoid the CompilerDriver's reliance on verifier data
       //       because we know the type better when inlining.
-      // TODO: HLoadClass - select better load kind if available.
     }
   }
 }
@@ -153,6 +154,123 @@
   invoke->SetDispatchInfo(dispatch_info);
 }
 
+void HSharpening::ProcessLoadClass(HLoadClass* load_class) {
+  if (load_class->NeedsAccessCheck()) {
+    // We need to call the runtime anyway, so we simply get the class as that call's return value.
+    return;
+  }
+  if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) {
+    // Loading from the ArtMethod* is the most efficient retrieval.
+    // TODO: This may not actually be true for all architectures and
+    // locations of target classes. The additional register pressure
+    // for using the ArtMethod* should be considered.
+    return;
+  }
+
+  DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+  DCHECK(!load_class->IsInDexCache()) << "HLoadClass should not be optimized before sharpening.";
+
+  const DexFile& dex_file = load_class->GetDexFile();
+  uint32_t type_index = load_class->GetTypeIndex();
+
+  bool is_in_dex_cache = false;
+  HLoadClass::LoadKind desired_load_kind;
+  uint64_t address = 0u;  // Class or dex cache element address.
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    Runtime* runtime = Runtime::Current();
+    ClassLinker* class_linker = runtime->GetClassLinker();
+    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile())
+        ? compilation_unit_.GetDexCache()
+        : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
+    mirror::Class* klass = dex_cache->GetResolvedType(type_index);
+
+    if (compiler_driver_->IsBootImage()) {
+      // Compiling boot image. Check if the class is a boot image class.
+      DCHECK(!runtime->UseJitCompilation());
+      if (!compiler_driver_->GetSupportBootImageFixup()) {
+        // MIPS/MIPS64 or compiler_driver_test. Do not sharpen.
+        desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+      } else {
+        if (klass != nullptr &&
+            compiler_driver_->IsImageClass(
+                dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
+          is_in_dex_cache = true;
+          desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
+              ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
+              : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
+        } else {
+          // Not a boot image class. We must go through the dex cache.
+          DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
+          desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative;
+        }
+      }
+    } else if (runtime->UseJitCompilation()) {
+      // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
+      // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+      is_in_dex_cache = (klass != nullptr);
+      if (klass != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+        // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
+        desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(klass);
+      } else {
+        // Note: If the class is not in the dex cache or isn't initialized, the
+        // instruction needs environment and will not be inlined across dex files.
+        // Within a dex file, the slow-path helper loads the correct class and
+        // inlined frames are used correctly for OOM stack trace.
+        // TODO: Write a test for this. Bug: 29416588
+        desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress;
+        void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index];
+        address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
+      }
+    } else {
+      // AOT app compilation. Check if the class is in the boot image.
+      if ((klass != nullptr) &&
+          runtime->GetHeap()->ObjectIsInBootImageSpace(klass) &&
+          !codegen_->GetCompilerOptions().GetCompilePic()) {
+        desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(klass);
+      } else {
+        // Not JIT and either the klass is not in boot image or we are compiling in PIC mode.
+        // Use PC-relative load from the dex cache if the dex file belongs
+        // to the oat file that we're currently compiling.
+        desired_load_kind =
+            ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &load_class->GetDexFile())
+                ? HLoadClass::LoadKind::kDexCachePcRelative
+                : HLoadClass::LoadKind::kDexCacheViaMethod;
+      }
+    }
+  }
+  if (is_in_dex_cache) {
+    load_class->MarkInDexCache();
+  }
+
+  HLoadClass::LoadKind load_kind = codegen_->GetSupportedLoadClassKind(desired_load_kind);
+  switch (load_kind) {
+    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadClass::LoadKind::kDexCacheViaMethod:
+      load_class->SetLoadKindWithTypeReference(load_kind, dex_file, type_index);
+      break;
+    case HLoadClass::LoadKind::kBootImageAddress:
+    case HLoadClass::LoadKind::kDexCacheAddress:
+      DCHECK_NE(address, 0u);
+      load_class->SetLoadKindWithAddress(load_kind, address);
+      break;
+    case HLoadClass::LoadKind::kDexCachePcRelative: {
+      size_t pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
+      DexCacheArraysLayout layout(pointer_size, &dex_file);
+      size_t element_index = layout.TypeOffset(type_index);
+      load_class->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load_kind;
+      UNREACHABLE();
+  }
+}
+
 void HSharpening::ProcessLoadString(HLoadString* load_string) {
   DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
   DCHECK(!load_string->IsInDexCache());
@@ -193,13 +311,14 @@
       mirror::String* string = dex_cache->GetResolvedString(string_index);
       is_in_dex_cache = (string != nullptr);
       if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
+        // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
         desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
         address = reinterpret_cast64<uint64_t>(string);
       } else {
         // Note: If the string is not in the dex cache, the instruction needs environment
         // and will not be inlined across dex files. Within a dex file, the slow-path helper
         // loads the correct string and inlined frames are used correctly for OOM stack trace.
-        // TODO: Write a test for this.
+        // TODO: Write a test for this. Bug: 29416588
         desired_load_kind = HLoadString::LoadKind::kDexCacheAddress;
         void* dex_cache_element_address = &dex_cache->GetStrings()[string_index];
         address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
@@ -207,20 +326,18 @@
     } else {
       // AOT app compilation. Try to lookup the string without allocating if not found.
       mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache);
-      if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
-        if (codegen_->GetCompilerOptions().GetCompilePic()) {
-          // Use PC-relative load from the dex cache if the dex file belongs
-          // to the oat file that we're currently compiling.
-          desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)
-              ? HLoadString::LoadKind::kDexCachePcRelative
-              : HLoadString::LoadKind::kDexCacheViaMethod;
-        } else {
-          desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
-          address = reinterpret_cast64<uint64_t>(string);
-        }
+      if (string != nullptr &&
+          runtime->GetHeap()->ObjectIsInBootImageSpace(string) &&
+          !codegen_->GetCompilerOptions().GetCompilePic()) {
+        desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+        address = reinterpret_cast64<uint64_t>(string);
       } else {
-        // Not JIT and the string is not in boot image.
-        desired_load_kind = HLoadString::LoadKind::kDexCachePcRelative;
+        // Not JIT and either the string is not in boot image or we are compiling in PIC mode.
+        // Use PC-relative load from the dex cache if the dex file belongs
+        // to the oat file that we're currently compiling.
+        desired_load_kind = ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)
+            ? HLoadString::LoadKind::kDexCachePcRelative
+            : HLoadString::LoadKind::kDexCacheViaMethod;
       }
     }
   }
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 24152f6..d35ae66 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -47,6 +47,7 @@
 
  private:
   void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
+  void ProcessLoadClass(HLoadClass* load_class);
   void ProcessLoadString(HLoadString* load_string);
 
   CodeGenerator* codegen_;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index ed50c69..5a574d9 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -181,7 +181,7 @@
     return true;
   } else {
     DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type));
-    auto&& inputs = phi->GetInputs();
+    HInputsRef inputs = phi->GetInputs();
     for (size_t i = 0; i < inputs.size(); ++i) {
       HInstruction* input = inputs[i];
       if (input->GetType() != common_type) {
@@ -617,7 +617,7 @@
       || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
       || (next->GetType() != type)) {
     ArenaAllocator* allocator = graph_->GetArena();
-    auto&& inputs = phi->GetInputs();
+    HInputsRef inputs = phi->GetInputs();
     HPhi* new_phi =
         new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type);
     // Copy the inputs. Note that the graph may not be correctly typed
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 212d935..7af4302 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -177,7 +177,7 @@
 static void RecursivelyProcessInputs(HInstruction* current,
                                      HInstruction* actual_user,
                                      BitVector* live_in) {
-  auto&& inputs = current->GetInputs();
+  HInputsRef inputs = current->GetInputs();
   for (size_t i = 0; i < inputs.size(); ++i) {
     HInstruction* input = inputs[i];
     bool has_in_location = current->GetLocations()->InAt(i).IsValid();
@@ -431,7 +431,7 @@
         // If the instruction dies at the phi assignment, we can try having the
         // same register.
         if (end == user->GetBlock()->GetPredecessors()[input_index]->GetLifetimeEnd()) {
-          auto&& inputs = user->GetInputs();
+          HInputsRef inputs = user->GetInputs();
           for (size_t i = 0; i < inputs.size(); ++i) {
             if (i == input_index) {
               continue;
@@ -472,7 +472,7 @@
   if (defined_by_->IsPhi()) {
     // Try to use the same register as one of the inputs.
     const ArenaVector<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors();
-    auto&& inputs = defined_by_->GetInputs();
+    HInputsRef inputs = defined_by_->GetInputs();
     for (size_t i = 0; i < inputs.size(); ++i) {
       size_t end = predecessors[i]->GetLifetimeEnd();
       LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(end - 1);
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 11a254e..fc8af64 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -228,7 +228,7 @@
 
 void StackMapStream::ComputeInlineInfoEncoding() {
   uint32_t method_index_max = 0;
-  uint32_t dex_pc_max = 0;
+  uint32_t dex_pc_max = DexFile::kDexNoIndex;
   uint32_t invoke_type_max = 0;
 
   uint32_t inline_info_index = 0;
@@ -236,7 +236,10 @@
     for (size_t j = 0; j < entry.inlining_depth; ++j) {
       InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
       method_index_max = std::max(method_index_max, inline_entry.method_index);
-      dex_pc_max = std::max(dex_pc_max, inline_entry.dex_pc);
+      if (inline_entry.dex_pc != DexFile::kDexNoIndex &&
+          (dex_pc_max == DexFile::kDexNoIndex || dex_pc_max < inline_entry.dex_pc)) {
+        dex_pc_max = inline_entry.dex_pc;
+      }
       invoke_type_max = std::max(invoke_type_max, static_cast<uint32_t>(inline_entry.invoke_type));
     }
   }
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 41f72f5..53a9795 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -108,7 +108,7 @@
   };
 
   struct InlineInfoEntry {
-    uint32_t dex_pc;
+    uint32_t dex_pc;  // DexFile::kDexNoIndex for intrinsified native methods.
     uint32_t method_index;
     InvokeType invoke_type;
     uint32_t num_dex_registers;
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 274d0de..a571d14 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -671,6 +671,9 @@
   virtual void vcmpdz(DRegister dd, Condition cond = AL) = 0;
   virtual void vmstat(Condition cond = AL) = 0;  // VMRS APSR_nzcv, FPSCR
 
+  virtual void vcntd(DRegister dd, DRegister dm) = 0;
+  virtual void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) = 0;
+
   virtual void vpushs(SRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0;
   virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 0a227b2..6f7119d 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1264,6 +1264,31 @@
   Emit(encoding);
 }
 
+void Arm32Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
+void Arm32Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit(encoding);
+}
+
 
 void Arm32Assembler::svc(uint32_t imm24) {
   CHECK(IsUint<24>(imm24)) << imm24;
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index bc6020e..8726ac8 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -205,6 +205,9 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index e570e22..b214062 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -899,4 +899,43 @@
   T3Helper(&arm::Arm32Assembler::revsh, true, "revsh{cond} {reg1}, {reg2}", "revsh");
 }
 
+TEST_F(AssemblerArm32Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  GetAssembler()->vcntd(arm::D0, arm::D1);
+  GetAssembler()->vcntd(arm::D19, arm::D20);
+  GetAssembler()->vcntd(arm::D0, arm::D9);
+  GetAssembler()->vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerArm32Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  GetAssembler()->vpaddld(arm::D0, arm::D0, 8, true);
+  GetAssembler()->vpaddld(arm::D20, arm::D20, 8, false);
+  GetAssembler()->vpaddld(arm::D0, arm::D20, 16, false);
+  GetAssembler()->vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 546dd65..a72ea41 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -3117,6 +3117,30 @@
   Emit32(encoding);
 }
 
+void Thumb2Assembler::vcntd(DRegister dd, DRegister dm) {
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B10 | B8) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
+
+void Thumb2Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) {
+  CHECK(size == 8 || size == 16 || size == 32) << size;
+  uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) |
+    ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) |
+    ((static_cast<int32_t>(dd) >> 4) * B22) |
+    ((static_cast<uint32_t>(dd) & 0xf) * B12) |
+    (B9) |
+    (is_unsigned ? B7 : 0) |
+    ((static_cast<int32_t>(dm) >> 4) * B5) |
+    (static_cast<uint32_t>(dm) & 0xf);
+
+  Emit32(encoding);
+}
 
 void Thumb2Assembler::svc(uint32_t imm8) {
   CHECK(IsUint<8>(imm8)) << imm8;
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index ce310a4..2ca74fc 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -250,6 +250,9 @@
   void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
   void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
 
+  void vcntd(DRegister dd, DRegister dm) OVERRIDE;
+  void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE;
+
   void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
   void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index b5cafcb..7f1dc49 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -1380,4 +1380,43 @@
   DriverStr(expected, "revsh");
 }
 
+TEST_F(AssemblerThumb2Test, vcnt) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  __ vcntd(arm::D0, arm::D1);
+  __ vcntd(arm::D19, arm::D20);
+  __ vcntd(arm::D0, arm::D9);
+  __ vcntd(arm::D16, arm::D20);
+
+  std::string expected =
+      "vcnt.8 d0, d1\n"
+      "vcnt.8 d19, d20\n"
+      "vcnt.8 d0, d9\n"
+      "vcnt.8 d16, d20\n";
+
+  DriverStr(expected, "vcnt");
+}
+
+TEST_F(AssemblerThumb2Test, vpaddl) {
+  // Different D register numbers are used here, to test register encoding.
+  // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd,
+  // For source and destination registers which use D0..D15, the M bit and D bit should be 0.
+  // For source and destination registers which use D16..D32, the M bit and D bit should be 1.
+  // Different data types (signed and unsigned) are also tested.
+  __ vpaddld(arm::D0, arm::D0, 8, true);
+  __ vpaddld(arm::D20, arm::D20, 8, false);
+  __ vpaddld(arm::D0, arm::D20, 16, false);
+  __ vpaddld(arm::D20, arm::D0, 32, true);
+
+  std::string expected =
+      "vpaddl.u8 d0, d0\n"
+      "vpaddl.s8 d20, d20\n"
+      "vpaddl.s16 d0, d20\n"
+      "vpaddl.u32 d20, d0\n";
+
+  DriverStr(expected, "vpaddl");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 1842f00..54ed62b 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -648,6 +648,15 @@
 void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) {
   int size = registers.RegisterSizeInBytes();
   const Register sp = vixl_masm_->StackPointer();
+  // Since we are operating on register pairs, we would like to align on
+  // double the standard size; on the other hand, we don't want to insert
+  // an extra store, which will happen if the number of registers is even.
+  if (!IsAlignedParam(offset, 2 * size) && registers.Count() % 2 != 0) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    ___ Str(dst0, MemOperand(sp, offset));
+    cfi_.RelOffset(DWARFReg(dst0), offset);
+    offset += size;
+  }
   while (registers.Count() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
@@ -667,6 +676,13 @@
 void Arm64Assembler::UnspillRegisters(vixl::CPURegList registers, int offset) {
   int size = registers.RegisterSizeInBytes();
   const Register sp = vixl_masm_->StackPointer();
+  // Be consistent with the logic for spilling registers.
+  if (!IsAlignedParam(offset, 2 * size) && registers.Count() % 2 != 0) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    ___ Ldr(dst0, MemOperand(sp, offset));
+    cfi_.Restore(DWARFReg(dst0));
+    offset += size;
+  }
   while (registers.Count() >= 2) {
     const CPURegister& dst0 = registers.PopLowestIndex();
     const CPURegister& dst1 = registers.PopLowestIndex();
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index afe0576..92b4c8e 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -344,6 +344,17 @@
   }
 
   template <typename ImmType>
+  std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType), int imm_bits, std::string fmt) {
+    return RepeatTemplatedRegistersImmBits<FPReg, FPReg, ImmType>(f,
+                                                                  imm_bits,
+                                                                  GetFPRegisters(),
+                                                                  GetFPRegisters(),
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  fmt);
+  }
+
+  template <typename ImmType>
   std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) {
     return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f,
                                                                   GetFPRegisters(),
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index ac93083..ebaf1c0 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -39,6 +39,7 @@
   for (auto& exception_block : exception_blocks_) {
     EmitExceptionPoll(&exception_block);
   }
+  EmitLiterals();
   PromoteBranches();
 }
 
@@ -444,6 +445,12 @@
   EmitI(0x25, rs, rt, imm16);
 }
 
+void MipsAssembler::Lwpc(Register rs, uint32_t imm19) {
+  CHECK(IsR6());
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, (0x01 << 19) | imm19);
+}
+
 void MipsAssembler::Lui(Register rt, uint16_t imm16) {
   EmitI(0xf, static_cast<Register>(0), rt, imm16);
 }
@@ -532,6 +539,10 @@
   EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16);
 }
 
+void MipsAssembler::Bal(uint16_t imm16) {
+  EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16);
+}
+
 void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) {
   EmitI(0x4, rs, rt, imm16);
 }
@@ -624,6 +635,11 @@
   EmitI26(0x32, imm26);
 }
 
+void MipsAssembler::Balc(uint32_t imm26) {
+  CHECK(IsR6());
+  EmitI26(0x3A, imm26);
+}
+
 void MipsAssembler::Jic(Register rt, uint16_t imm16) {
   CHECK(IsR6());
   EmitI(0x36, static_cast<Register>(0), rt, imm16);
@@ -1489,30 +1505,47 @@
   type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
 }
 
-void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) {
+void MipsAssembler::Branch::InitializeType(bool is_call, bool is_literal, bool is_r6) {
+  CHECK_EQ(is_call && is_literal, false);
   OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
   if (is_r6) {
     // R6
-    if (is_call) {
+    if (is_literal) {
+      CHECK(!IsResolved());
+      type_ = kR6Literal;
+    } else if (is_call) {
       InitShortOrLong(offset_size, kR6Call, kR6LongCall);
-    } else if (condition_ == kUncond) {
-      InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
     } else {
-      if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
-        // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
-        type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
-      } else {
-        InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+      switch (condition_) {
+        case kUncond:
+          InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
+          break;
+        case kCondEQZ:
+        case kCondNEZ:
+          // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+          type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
+          break;
+        default:
+          InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+          break;
       }
     }
   } else {
     // R2
-    if (is_call) {
+    if (is_literal) {
+      CHECK(!IsResolved());
+      type_ = kLiteral;
+    } else if (is_call) {
       InitShortOrLong(offset_size, kCall, kLongCall);
-    } else if (condition_ == kUncond) {
-      InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
     } else {
-      InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+      switch (condition_) {
+        case kUncond:
+          InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+          break;
+        default:
+          InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+          break;
+      }
     }
   }
   old_type_ = type_;
@@ -1544,14 +1577,14 @@
   }
 }
 
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target)
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call)
     : old_location_(location),
       location_(location),
       target_(target),
       lhs_reg_(0),
       rhs_reg_(0),
       condition_(kUncond) {
-  InitializeType(false, is_r6);
+  InitializeType(is_call, /* is_literal */ false, is_r6);
 }
 
 MipsAssembler::Branch::Branch(bool is_r6,
@@ -1608,19 +1641,23 @@
     // Branch condition is always true, make the branch unconditional.
     condition_ = kUncond;
   }
-  InitializeType(false, is_r6);
+  InitializeType(/* is_call */ false, /* is_literal */ false, is_r6);
 }
 
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg)
+MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg)
     : old_location_(location),
       location_(location),
-      target_(target),
-      lhs_reg_(indirect_reg),
-      rhs_reg_(0),
+      target_(kUnresolved),
+      lhs_reg_(dest_reg),
+      rhs_reg_(base_reg),
       condition_(kUncond) {
-  CHECK_NE(indirect_reg, ZERO);
-  CHECK_NE(indirect_reg, AT);
-  InitializeType(true, is_r6);
+  CHECK_NE(dest_reg, ZERO);
+  if (is_r6) {
+    CHECK_EQ(base_reg, ZERO);
+  } else {
+    CHECK_NE(base_reg, ZERO);
+  }
+  InitializeType(/* is_call */ false, /* is_literal */ true, is_r6);
 }
 
 MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition(
@@ -1722,19 +1759,27 @@
     case kUncondBranch:
     case kCondBranch:
     case kCall:
+    // R2 near literal.
+    case kLiteral:
     // R6 short branches.
     case kR6UncondBranch:
     case kR6CondBranch:
     case kR6Call:
+    // R6 near literal.
+    case kR6Literal:
       return false;
     // R2 long branches.
     case kLongUncondBranch:
     case kLongCondBranch:
     case kLongCall:
+    // R2 far literal.
+    case kFarLiteral:
     // R6 long branches.
     case kR6LongUncondBranch:
     case kR6LongCondBranch:
     case kR6LongCall:
+    // R6 far literal.
+    case kR6FarLiteral:
       return true;
   }
   UNREACHABLE();
@@ -1803,6 +1848,10 @@
     case kCall:
       type_ = kLongCall;
       break;
+    // R2 near literal.
+    case kLiteral:
+      type_ = kFarLiteral;
+      break;
     // R6 short branches.
     case kR6UncondBranch:
       type_ = kR6LongUncondBranch;
@@ -1813,6 +1862,10 @@
     case kR6Call:
       type_ = kR6LongCall;
       break;
+    // R6 near literal.
+    case kR6Literal:
+      type_ = kR6FarLiteral;
+      break;
     default:
       // Note: 'type_' is already long.
       break;
@@ -1820,14 +1873,26 @@
   CHECK(IsLong());
 }
 
-uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const {
+  switch (branch->GetType()) {
+    case Branch::kLiteral:
+    case Branch::kFarLiteral:
+      return GetLabelLocation(&pc_rel_base_label_);
+    default:
+      return branch->GetLocation();
+  }
+}
+
+uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) {
+  // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or
+  // `this->GetLocation()` for everything else.
   // If the branch is still unresolved or already long, nothing to do.
   if (IsLong() || !IsResolved()) {
     return 0;
   }
   // Promote the short branch to long if the offset size is too small
-  // to hold the distance between location_ and target_.
-  if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+  // to hold the distance between location and target_.
+  if (GetOffsetSizeNeeded(location, target_) > GetOffsetSize()) {
     PromoteToLong();
     uint32_t old_size = GetOldSize();
     uint32_t new_size = GetSize();
@@ -1837,7 +1902,7 @@
   // The following logic is for debugging/testing purposes.
   // Promote some short branches to long when it's not really required.
   if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
-    int64_t distance = static_cast<int64_t>(target_) - location_;
+    int64_t distance = static_cast<int64_t>(target_) - location;
     distance = (distance >= 0) ? distance : -distance;
     if (distance >= max_short_distance) {
       PromoteToLong();
@@ -1854,12 +1919,26 @@
   return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
 }
 
-uint32_t MipsAssembler::Branch::GetOffset() const {
+uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const {
+  switch (branch->GetType()) {
+    case Branch::kLiteral:
+    case Branch::kFarLiteral:
+      return GetLabelLocation(&pc_rel_base_label_);
+    default:
+      return branch->GetOffsetLocation() +
+          Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t);
+  }
+}
+
+uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const {
+  // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or
+  // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)`
+  // for everything else.
   CHECK(IsResolved());
   uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
   // Calculate the byte distance between instructions and also account for
   // different PC-relative origins.
-  uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+  uint32_t offset = target_ - location;
   // Prepare the offset for encoding into the instruction(s).
   offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
   return offset;
@@ -1906,7 +1985,7 @@
   label->BindTo(bound_pc);
 }
 
-uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const {
+uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const {
   CHECK(label->IsBound());
   uint32_t target = label->Position();
   if (label->prev_branch_id_plus_one_) {
@@ -1941,6 +2020,10 @@
   return old_position + last_position_adjustment_;
 }
 
+void MipsAssembler::BindPcRelBaseLabel() {
+  Bind(&pc_rel_base_label_);
+}
+
 void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) {
   uint32_t length = branches_.back().GetLength();
   if (!label->IsBound()) {
@@ -1962,7 +2045,7 @@
 
 void MipsAssembler::Buncond(MipsLabel* label) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(IsR6(), buffer_.Size(), target);
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false);
   FinalizeLabeledBranch(label);
 }
 
@@ -1976,12 +2059,46 @@
   FinalizeLabeledBranch(label);
 }
 
-void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) {
+void MipsAssembler::Call(MipsLabel* label) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg);
+  branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true);
   FinalizeLabeledBranch(label);
 }
 
+Literal* MipsAssembler::NewLiteral(size_t size, const uint8_t* data) {
+  DCHECK(size == 4u || size == 8u) << size;
+  literals_.emplace_back(size, data);
+  return &literals_.back();
+}
+
+void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* literal) {
+  // Literal loads are treated as pseudo branches since they require very similar handling.
+  DCHECK_EQ(literal->GetSize(), 4u);
+  MipsLabel* label = literal->GetLabel();
+  DCHECK(!label->IsBound());
+  branches_.emplace_back(IsR6(),
+                         buffer_.Size(),
+                         dest_reg,
+                         base_reg);
+  FinalizeLabeledBranch(label);
+}
+
+void MipsAssembler::EmitLiterals() {
+  if (!literals_.empty()) {
+    // We don't support byte and half-word literals.
+    // TODO: proper alignment for 64-bit literals when they're implemented.
+    for (Literal& literal : literals_) {
+      MipsLabel* label = literal.GetLabel();
+      Bind(label);
+      AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+      DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u);
+      for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+        buffer_.Emit<uint8_t>(literal.GetData()[i]);
+      }
+    }
+  }
+}
+
 void MipsAssembler::PromoteBranches() {
   // Promote short branches to long as necessary.
   bool changed;
@@ -1989,7 +2106,8 @@
     changed = false;
     for (auto& branch : branches_) {
       CHECK(branch.IsResolved());
-      uint32_t delta = branch.PromoteIfNeeded();
+      uint32_t base = GetBranchLocationOrPcRelBase(&branch);
+      uint32_t delta = branch.PromoteIfNeeded(base);
       // If this branch has been promoted and needs to expand in size,
       // relocate all branches by the expansion size.
       if (delta) {
@@ -2027,27 +2145,35 @@
   // R2 short branches.
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kUncondBranch
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCondBranch
-  {  5, 2, 0, MipsAssembler::Branch::kOffset16, 0 },  // kCall
+  {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kCall
+  // R2 near literal.
+  {  1, 0, 0, MipsAssembler::Branch::kOffset16, 0 },  // kLiteral
   // R2 long branches.
   {  9, 3, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongUncondBranch
   { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCondBranch
   {  6, 1, 1, MipsAssembler::Branch::kOffset32, 0 },  // kLongCall
+  // R2 far literal.
+  {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kFarLiteral
   // R6 short branches.
   {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6UncondBranch
   {  2, 0, 1, MipsAssembler::Branch::kOffset18, 2 },  // kR6CondBranch
                                                       // Exception: kOffset23 for beqzc/bnezc.
-  {  2, 0, 0, MipsAssembler::Branch::kOffset21, 2 },  // kR6Call
+  {  1, 0, 1, MipsAssembler::Branch::kOffset28, 2 },  // kR6Call
+  // R6 near literal.
+  {  1, 0, 0, MipsAssembler::Branch::kOffset21, 2 },  // kR6Literal
   // R6 long branches.
   {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongUncondBranch
   {  3, 1, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCondBranch
-  {  3, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCall
+  {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6LongCall
+  // R6 far literal.
+  {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6FarLiteral
 };
 
-// Note: make sure branch_info_[] and mitBranch() are kept synchronized.
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
 void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
   CHECK_EQ(overwriting_, true);
   overwrite_location_ = branch->GetLocation();
-  uint32_t offset = branch->GetOffset();
+  uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch));
   BranchCondition condition = branch->GetCondition();
   Register lhs = branch->GetLeftRegister();
   Register rhs = branch->GetRightRegister();
@@ -2064,12 +2190,15 @@
       Nop();  // TODO: improve by filling the delay slot.
       break;
     case Branch::kCall:
-      Nal();
-      Nop();  // TODO: is this NOP really needed here?
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Addiu(lhs, RA, offset);
-      Jalr(lhs);
-      Nop();
+      Bal(offset);
+      Nop();  // TODO: improve by filling the delay slot.
+      break;
+
+    // R2 near literal.
+    case Branch::kLiteral:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lw(lhs, rhs, offset);
       break;
 
     // R2 long branches.
@@ -2123,11 +2252,20 @@
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Lui(AT, High16Bits(offset));
       Ori(AT, AT, Low16Bits(offset));
-      Addu(lhs, AT, RA);
-      Jalr(lhs);
+      Addu(AT, AT, RA);
+      Jalr(AT);
       Nop();
       break;
 
+    // R2 far literal.
+    case Branch::kFarLiteral:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in lw.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lui(AT, High16Bits(offset));
+      Addu(AT, AT, rhs);
+      Lw(lhs, AT, Low16Bits(offset));
+      break;
+
     // R6 short branches.
     case Branch::kR6UncondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -2140,8 +2278,13 @@
       break;
     case Branch::kR6Call:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Addiupc(lhs, offset);
-      Jialc(lhs, 0);
+      Balc(offset);
+      break;
+
+    // R6 near literal.
+    case Branch::kR6Literal:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lwpc(lhs, offset);
       break;
 
     // R6 long branches.
@@ -2159,11 +2302,18 @@
       Jic(AT, Low16Bits(offset));
       break;
     case Branch::kR6LongCall:
-      offset += (offset & 0x8000) << 1;  // Account for sign extension in addiu.
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jialc.
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Auipc(lhs, High16Bits(offset));
-      Addiu(lhs, lhs, Low16Bits(offset));
-      Jialc(lhs, 0);
+      Auipc(AT, High16Bits(offset));
+      Jialc(AT, Low16Bits(offset));
+      break;
+
+    // R6 far literal.
+    case Branch::kR6FarLiteral:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in lw.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Lw(lhs, AT, Low16Bits(offset));
       break;
   }
   CHECK_EQ(overwrite_location_, branch->GetEndLocation());
@@ -2174,8 +2324,8 @@
   Buncond(label);
 }
 
-void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) {
-  Call(label, indirect_reg);
+void MipsAssembler::Bal(MipsLabel* label) {
+  Call(label);
 }
 
 void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 31b3b31..1f7781f 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -17,10 +17,12 @@
 #ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 #define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_
 
+#include <deque>
 #include <utility>
 #include <vector>
 
 #include "arch/mips/instruction_set_features_mips.h"
+#include "base/arena_containers.h"
 #include "base/macros.h"
 #include "constants_mips.h"
 #include "globals.h"
@@ -79,6 +81,49 @@
   DISALLOW_COPY_AND_ASSIGN(MipsLabel);
 };
 
+// Assembler literal is a value embedded in code, retrieved using a PC-relative load.
+class Literal {
+ public:
+  static constexpr size_t kMaxSize = 8;
+
+  Literal(uint32_t size, const uint8_t* data)
+      : label_(), size_(size) {
+    DCHECK_LE(size, Literal::kMaxSize);
+    memcpy(data_, data, size);
+  }
+
+  template <typename T>
+  T GetValue() const {
+    DCHECK_EQ(size_, sizeof(T));
+    T value;
+    memcpy(&value, data_, sizeof(T));
+    return value;
+  }
+
+  uint32_t GetSize() const {
+    return size_;
+  }
+
+  const uint8_t* GetData() const {
+    return data_;
+  }
+
+  MipsLabel* GetLabel() {
+    return &label_;
+  }
+
+  const MipsLabel* GetLabel() const {
+    return &label_;
+  }
+
+ private:
+  MipsLabel label_;
+  const uint32_t size_;
+  uint8_t data_[kMaxSize];
+
+  DISALLOW_COPY_AND_ASSIGN(Literal);
+};
+
 // Slowpath entered when Thread::Current()->_exception is non-null.
 class MipsExceptionSlowPath {
  public:
@@ -107,6 +152,7 @@
       : Assembler(arena),
         overwriting_(false),
         overwrite_location_(0),
+        literals_(arena->Adapter(kArenaAllocAssembler)),
         last_position_adjustment_(0),
         last_old_position_(0),
         last_branch_id_(0),
@@ -182,6 +228,7 @@
   void Lwr(Register rt, Register rs, uint16_t imm16);
   void Lbu(Register rt, Register rs, uint16_t imm16);
   void Lhu(Register rt, Register rs, uint16_t imm16);
+  void Lwpc(Register rs, uint32_t imm19);  // R6
   void Lui(Register rt, uint16_t imm16);
   void Aui(Register rt, Register rs, uint16_t imm16);  // R6
   void Sync(uint32_t stype);
@@ -205,6 +252,7 @@
   void Sltiu(Register rt, Register rs, uint16_t imm16);
 
   void B(uint16_t imm16);
+  void Bal(uint16_t imm16);
   void Beq(Register rs, Register rt, uint16_t imm16);
   void Bne(Register rs, Register rt, uint16_t imm16);
   void Beqz(Register rt, uint16_t imm16);
@@ -226,6 +274,7 @@
   void Auipc(Register rs, uint16_t imm16);  // R6
   void Addiupc(Register rs, uint32_t imm19);  // R6
   void Bc(uint32_t imm26);  // R6
+  void Balc(uint32_t imm26);  // R6
   void Jic(Register rt, uint16_t imm16);  // R6
   void Jialc(Register rt, uint16_t imm16);  // R6
   void Bltc(Register rs, Register rt, uint16_t imm16);  // R6
@@ -365,7 +414,7 @@
   // These will generate R2 branches or R6 branches as appropriate.
   void Bind(MipsLabel* label);
   void B(MipsLabel* label);
-  void Jalr(MipsLabel* label, Register indirect_reg);
+  void Bal(MipsLabel* label);
   void Beq(Register rs, Register rt, MipsLabel* label);
   void Bne(Register rs, Register rt, MipsLabel* label);
   void Beqz(Register rt, MipsLabel* label);
@@ -412,6 +461,21 @@
     UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS";
   }
 
+  // Create a new literal with a given value.
+  // NOTE: Force the template parameter to be explicitly specified.
+  template <typename T>
+  Literal* NewLiteral(typename Identity<T>::type value) {
+    static_assert(std::is_integral<T>::value, "T must be an integral type.");
+    return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
+  }
+
+  // Create a new literal with the given data.
+  Literal* NewLiteral(size_t size, const uint8_t* data);
+
+  // Load literal using the base register (for R2 only) or using PC-relative loads
+  // (for R6 only; base_reg must be ZERO).
+  void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal);
+
   //
   // Overridden common assembler high-level functionality.
   //
@@ -569,12 +633,22 @@
 
   // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS,
   // must be used instead of MipsLabel::GetPosition()).
-  uint32_t GetLabelLocation(MipsLabel* label) const;
+  uint32_t GetLabelLocation(const MipsLabel* label) const;
 
   // Get the final position of a label after local fixup based on the old position
   // recorded before FinalizeCode().
   uint32_t GetAdjustedPosition(uint32_t old_position);
 
+  // R2 doesn't have PC-relative addressing, which we need to access literals. We simulate it by
+  // reading the PC value into a general-purpose register with the NAL instruction and then loading
+  // literals through this base register. The code generator calls this method (at most once per
+  // method being compiled) to bind a label to the location for which the PC value is acquired.
+  // The assembler then computes literal offsets relative to this label.
+  void BindPcRelBaseLabel();
+
+  // Note that PC-relative literal loads are handled as pseudo branches because they need very
+  // similar relocation and may similarly expand in size to accomodate for larger offsets relative
+  // to PC.
   enum BranchCondition {
     kCondLT,
     kCondGE,
@@ -604,18 +678,26 @@
       kUncondBranch,
       kCondBranch,
       kCall,
+      // R2 near literal.
+      kLiteral,
       // R2 long branches.
       kLongUncondBranch,
       kLongCondBranch,
       kLongCall,
+      // R2 far literal.
+      kFarLiteral,
       // R6 short branches.
       kR6UncondBranch,
       kR6CondBranch,
       kR6Call,
+      // R6 near literal.
+      kR6Literal,
       // R6 long branches.
       kR6LongUncondBranch,
       kR6LongCondBranch,
       kR6LongCall,
+      // R6 far literal.
+      kR6FarLiteral,
     };
     // Bit sizes of offsets defined as enums to minimize chance of typos.
     enum OffsetBits {
@@ -650,17 +732,17 @@
     };
     static const BranchInfo branch_info_[/* Type */];
 
-    // Unconditional branch.
-    Branch(bool is_r6, uint32_t location, uint32_t target);
+    // Unconditional branch or call.
+    Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call);
     // Conditional branch.
     Branch(bool is_r6,
            uint32_t location,
            uint32_t target,
            BranchCondition condition,
            Register lhs_reg,
-           Register rhs_reg = ZERO);
-    // Call (branch and link) that stores the target address in a given register (i.e. T9).
-    Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg);
+           Register rhs_reg);
+    // Literal.
+    Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg);
 
     // Some conditional branches with lhs = rhs are effectively NOPs, while some
     // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
@@ -736,17 +818,18 @@
     // that is allowed for short branches. This is for debugging/testing purposes.
     // max_short_distance = 0 forces all short branches to become long.
     // Use the implicit default argument when not debugging/testing.
-    uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+    uint32_t PromoteIfNeeded(uint32_t location,
+                             uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
 
     // Returns the location of the instruction(s) containing the offset.
     uint32_t GetOffsetLocation() const;
 
     // Calculates and returns the offset ready for encoding in the branch instruction(s).
-    uint32_t GetOffset() const;
+    uint32_t GetOffset(uint32_t location) const;
 
    private:
     // Completes branch construction by determining and recording its type.
-    void InitializeType(bool is_call, bool is_r6);
+    void InitializeType(bool is_call, bool is_literal, bool is_r6);
     // Helper for the above.
     void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
 
@@ -776,12 +859,15 @@
 
   void Buncond(MipsLabel* label);
   void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
-  void Call(MipsLabel* label, Register indirect_reg);
+  void Call(MipsLabel* label);
   void FinalizeLabeledBranch(MipsLabel* label);
 
   Branch* GetBranch(uint32_t branch_id);
   const Branch* GetBranch(uint32_t branch_id) const;
+  uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const;
+  uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const;
 
+  void EmitLiterals();
   void PromoteBranches();
   void EmitBranch(Branch* branch);
   void EmitBranches();
@@ -816,6 +902,15 @@
   // The current overwrite location.
   uint32_t overwrite_location_;
 
+  // Use std::deque<> for literal labels to allow insertions at the end
+  // without invalidating pointers and references to existing elements.
+  ArenaDeque<Literal> literals_;
+
+  // There's no PC-relative addressing on MIPS32R2. So, in order to access literals relative to PC
+  // we get PC using the NAL instruction. This label marks the position within the assembler buffer
+  // that PC (from NAL) points to.
+  MipsLabel pc_rel_base_label_;
+
   // Data for AdjustedPosition(), see the description there.
   uint32_t last_position_adjustment_;
   uint32_t last_old_position_;
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index ce92d60..49ef272 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -48,8 +48,30 @@
     return "mips";
   }
 
+  std::string GetAssemblerCmdName() OVERRIDE {
+    // We assemble and link for MIPS32R6. See GetAssemblerParameters() for details.
+    return "gcc";
+  }
+
   std::string GetAssemblerParameters() OVERRIDE {
-    return " --no-warn -32 -march=mips32r6";
+    // We assemble and link for MIPS32R6. The reason is that object files produced for MIPS32R6
+    // (and MIPS64R6) with the GNU assembler don't have correct final offsets in PC-relative
+    // branches in the .text section and so they require a relocation pass (there's a relocation
+    // section, .rela.text, that has the needed info to fix up the branches).
+    // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers.
+    // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the
+    // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily).
+    return " -march=mips32r6 -modd-spreg -Wa,--no-warn"
+        " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib";
+  }
+
+  void Pad(std::vector<uint8_t>& data) OVERRIDE {
+    // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple
+    // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't
+    // pad, so, in order for two assembler outputs to match, we need to match the padding as well.
+    // NOP is encoded as four zero bytes on MIPS.
+    size_t pad_size = RoundUp(data.size(), 16u) - data.size();
+    data.insert(data.end(), pad_size, 0);
   }
 
   std::string GetDisassembleParameters() OVERRIDE {
@@ -272,6 +294,21 @@
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Aui, 16, "aui ${reg1}, ${reg2}, {imm}"), "Aui");
 }
 
+TEST_F(AssemblerMIPS32r6Test, Auipc) {
+  DriverStr(RepeatRIb(&mips::MipsAssembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Lwpc) {
+  // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset,
+  // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`.
+  // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right
+  // by 2 positions when encoding, hence `<< 2` to compensate for that shift.
+  // We capture the value of the immediate with `.set imm, {imm}` because the value is needed
+  // twice for the sign extension, but `{imm}` is substituted only once.
+  const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)";
+  DriverStr(RepeatRIb(&mips::MipsAssembler::Lwpc, 19, code), "Lwpc");
+}
+
 TEST_F(AssemblerMIPS32r6Test, Bitswap) {
   DriverStr(RepeatRR(&mips::MipsAssembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap");
 }
@@ -598,12 +635,45 @@
   DriverStr(expected, "StoreDToOffset");
 }
 
+TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAdduCount = 0x3FFDE;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "lwpc $v0, 1f\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ LoadLiteral(mips::V0, mips::ZERO, literal);
+  constexpr size_t kAdduCount = 0x3FFDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "auipc $at, %hi(2f - 1b)\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteral");
+}
+
 //////////////
 // BRANCHES //
 //////////////
 
-// TODO: MipsAssembler::Auipc
-//       MipsAssembler::Addiupc
+// TODO: MipsAssembler::Addiupc
 //       MipsAssembler::Bc
 //       MipsAssembler::Jic
 //       MipsAssembler::Jialc
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index c722d0c..50a8dc2 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -647,6 +647,26 @@
   DriverStr(RepeatRRIb(&mips::MipsAssembler::Movt, 3, "movt ${reg1}, ${reg2}, $fcc{imm}"), "Movt");
 }
 
+TEST_F(AssemblerMIPSTest, MovfS) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovfS, 3, "movf.s ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovfS");
+}
+
+TEST_F(AssemblerMIPSTest, MovfD) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovfD, 3, "movf.d ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovfD");
+}
+
+TEST_F(AssemblerMIPSTest, MovtS) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovtS, 3, "movt.s ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovtS");
+}
+
+TEST_F(AssemblerMIPSTest, MovtD) {
+  DriverStr(RepeatFFIb(&mips::MipsAssembler::MovtD, 3, "movt.d ${reg1}, ${reg2}, $fcc{imm}"),
+            "MovtD");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSW) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW");
 }
@@ -2273,6 +2293,44 @@
   DriverStr(expected, "LoadConst32");
 }
 
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ BindPcRelBaseLabel();
+  __ LoadLiteral(mips::V0, mips::V1, literal);
+  constexpr size_t kAddiuCount = 0x1FDE;
+  for (size_t i = 0; i != kAddiuCount; ++i) {
+    __ Addiu(mips::A0, mips::A1, 0);
+  }
+
+  std::string expected =
+      "1:\n"
+      "lw $v0, %lo(2f - 1b)($v1)\n" +
+      RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) {
+  mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+  __ BindPcRelBaseLabel();
+  __ LoadLiteral(mips::V0, mips::V1, literal);
+  constexpr size_t kAdduCount = 0x1FDF;
+  for (size_t i = 0; i != kAdduCount; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+
+  std::string expected =
+      "1:\n"
+      "lui $at, %hi(2f - 1b)\n"
+      "addu $at, $at, $v1\n"
+      "lw $v0, %lo(2f - 1b)($at)\n" +
+      RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      ".word 0x12345678\n";
+  DriverStr(expected, "LoadNearestFarLiteral");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/compiler/utils/string_reference.h b/compiler/utils/string_reference.h
index 72552f2..e4c34ca 100644
--- a/compiler/utils/string_reference.h
+++ b/compiler/utils/string_reference.h
@@ -20,16 +20,19 @@
 #include <stdint.h>
 
 #include "base/logging.h"
+#include "dex_file-inl.h"
 #include "utf-inl.h"
 
 namespace art {
 
-class DexFile;
-
-// A string is uniquely located by its DexFile and the string_ids_ table index into that DexFile.
+// A string is located by its DexFile and the string_ids_ table index into that DexFile.
 struct StringReference {
   StringReference(const DexFile* file, uint32_t index) : dex_file(file), string_index(index) { }
 
+  const char* GetStringData() const {
+    return dex_file->GetStringData(dex_file->GetStringId(string_index));
+  }
+
   const DexFile* dex_file;
   uint32_t string_index;
 };
@@ -46,15 +49,13 @@
       // Use the string order enforced by the dex file verifier.
       DCHECK_EQ(
           sr1.string_index < sr2.string_index,
-          CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
-              sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)),
-              sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0);
+          CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(),
+                                                                  sr2.GetStringData()) < 0);
       return sr1.string_index < sr2.string_index;
     } else {
       // Cannot compare indexes, so do the string comparison.
-      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(
-          sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)),
-          sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0;
+      return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(),
+                                                                     sr2.GetStringData()) < 0;
     }
   }
 };
diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc
new file mode 100644
index 0000000..df5080e
--- /dev/null
+++ b/compiler/utils/string_reference_test.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/string_reference.h"
+
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "utils/test_dex_file_builder.h"
+
+namespace art {
+
+TEST(StringReference, ValueComparator) {
+  // This is a regression test for the StringReferenceValueComparator using the wrong
+  // dex file to get the string data from a StringId. We construct two dex files with
+  // just a single string with the same length but different value. This creates dex
+  // files that have the same layout, so the byte offset read from the StringId in one
+  // dex file, when used in the other dex file still points to valid string data, except
+  // that it's the wrong string. Without the fix the strings would then compare equal.
+  TestDexFileBuilder builder1;
+  builder1.AddString("String1");
+  std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
+  ASSERT_EQ(1u, dex_file1->NumStringIds());
+  ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(0)));
+  StringReference sr1(dex_file1.get(), 0);
+
+  TestDexFileBuilder builder2;
+  builder2.AddString("String2");
+  std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 2");
+  ASSERT_EQ(1u, dex_file2->NumStringIds());
+  ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(0)));
+  StringReference sr2(dex_file2.get(), 0);
+
+  StringReferenceValueComparator cmp;
+  EXPECT_TRUE(cmp(sr1, sr2));  // "String1" < "String2" is true.
+  EXPECT_FALSE(cmp(sr2, sr1));  // "String2" < "String1" is false.
+}
+
+TEST(StringReference, ValueComparator2) {
+  const char* const kDexFile1Strings[] = {
+      "",
+      "abc",
+      "abcxyz",
+  };
+  const char* const kDexFile2Strings[] = {
+      "a",
+      "abc",
+      "abcdef",
+      "def",
+  };
+  const bool expectedCmp12[arraysize(kDexFile1Strings)][arraysize(kDexFile2Strings)] = {
+      { true, true, true, true },
+      { false, false, true, true },
+      { false, false, false, true },
+  };
+  const bool expectedCmp21[arraysize(kDexFile2Strings)][arraysize(kDexFile1Strings)] = {
+      { false, true, true },
+      { false, false, true },
+      { false, false, true },
+      { false, false, false },
+  };
+
+  TestDexFileBuilder builder1;
+  for (const char* s : kDexFile1Strings) {
+    builder1.AddString(s);
+  }
+  std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1");
+  ASSERT_EQ(arraysize(kDexFile1Strings), dex_file1->NumStringIds());
+  for (size_t index = 0; index != arraysize(kDexFile1Strings); ++index) {
+    ASSERT_STREQ(kDexFile1Strings[index], dex_file1->GetStringData(dex_file1->GetStringId(index)));
+  }
+
+  TestDexFileBuilder builder2;
+  for (const char* s : kDexFile2Strings) {
+    builder2.AddString(s);
+  }
+  std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 1");
+  ASSERT_EQ(arraysize(kDexFile2Strings), dex_file2->NumStringIds());
+  for (size_t index = 0; index != arraysize(kDexFile2Strings); ++index) {
+    ASSERT_STREQ(kDexFile2Strings[index], dex_file2->GetStringData(dex_file2->GetStringId(index)));
+  }
+
+  StringReferenceValueComparator cmp;
+  for (size_t index1 = 0; index1 != arraysize(kDexFile1Strings); ++index1) {
+    for (size_t index2 = 0; index2 != arraysize(kDexFile2Strings); ++index2) {
+      StringReference sr1(dex_file1.get(), index1);
+      StringReference sr2(dex_file2.get(), index2);
+      EXPECT_EQ(expectedCmp12[index1][index2], cmp(sr1, sr2)) << index1 << " " << index2;
+      EXPECT_EQ(expectedCmp21[index2][index1], cmp(sr2, sr1)) << index1 << " " << index2;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index fb37804..6921780 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -227,9 +227,18 @@
     // Write the complete header again, just simpler that way.
     std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
 
+    static constexpr bool kVerify = false;
+    static constexpr bool kVerifyChecksum = false;
     std::string error_msg;
     std::unique_ptr<const DexFile> dex_file(DexFile::Open(
-        &dex_file_data_[0], dex_file_data_.size(), dex_location, 0u, nullptr, false, &error_msg));
+        &dex_file_data_[0],
+        dex_file_data_.size(),
+        dex_location,
+        0u,
+        nullptr,
+        kVerify,
+        kVerifyChecksum,
+        &error_msg));
     CHECK(dex_file != nullptr) << error_msg;
     return dex_file;
   }
diff --git a/compiler/utils/transform_array_ref.h b/compiler/utils/transform_array_ref.h
index 6297b88..a6da34f 100644
--- a/compiler/utils/transform_array_ref.h
+++ b/compiler/utils/transform_array_ref.h
@@ -70,6 +70,11 @@
   TransformArrayRef(const ArrayRef<OtherBT>& base, Function fn)
       : data_(base, fn) { }
 
+  template <typename OtherBT,
+            typename = typename std::enable_if<std::is_same<BaseType, const OtherBT>::value>::type>
+  TransformArrayRef(const TransformArrayRef<OtherBT, Function>& other)
+      : TransformArrayRef(other.base(), other.GetFunction()) { }
+
   // Assignment operators.
 
   TransformArrayRef& operator=(const TransformArrayRef& other) = default;
@@ -149,6 +154,9 @@
   }
 
   Data data_;
+
+  template <typename OtherBT, typename OtherFunction>
+  friend class TransformArrayRef;
 };
 
 template <typename BaseType, typename Function>
diff --git a/compiler/utils/transform_array_ref_test.cc b/compiler/utils/transform_array_ref_test.cc
index 2593fad..8d71fd7 100644
--- a/compiler/utils/transform_array_ref_test.cc
+++ b/compiler/utils/transform_array_ref_test.cc
@@ -160,6 +160,48 @@
     taref[i] = transform_input[i];
   }
   ASSERT_EQ(std::vector<ValueHolder>({ 24, 37, 11, 71 }), transformed);
+
+  const std::vector<ValueHolder>& cinput = input;
+
+  auto ctaref = MakeTransformArrayRef(cinput, ref);
+  static_assert(std::is_same<int, decltype(ctaref)::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, decltype(ctaref)::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, decltype(ctaref)::reference>::value, "reference");
+  static_assert(std::is_same<const int*, decltype(ctaref)::const_pointer>::value, "const_pointer");
+  static_assert(std::is_same<const int&, decltype(ctaref)::const_reference>::value,
+                "const_reference");
+
+  std::copy(ctaref.begin(), ctaref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(ctaref.cbegin(), ctaref.cend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(ctaref.rbegin(), ctaref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  std::copy(ctaref.crbegin(), ctaref.crend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  ASSERT_EQ(cinput.size(), ctaref.size());
+  ASSERT_EQ(cinput.empty(), ctaref.empty());
+  ASSERT_EQ(cinput.front().value, ctaref.front());
+  ASSERT_EQ(cinput.back().value, ctaref.back());
+
+  for (size_t i = 0; i != cinput.size(); ++i) {
+    ASSERT_EQ(cinput[i].value, ctaref[i]);
+  }
+
+  // Test conversion adding const.
+  decltype(ctaref) ctaref2 = taref;
+  ASSERT_EQ(taref.size(), ctaref2.size());
+  for (size_t i = 0; i != taref.size(); ++i) {
+    ASSERT_EQ(taref[i], ctaref2[i]);
+  }
 }
 
 }  // namespace art
diff --git a/compiler/utils/transform_iterator.h b/compiler/utils/transform_iterator.h
index f0769d4..3bc9046 100644
--- a/compiler/utils/transform_iterator.h
+++ b/compiler/utils/transform_iterator.h
@@ -44,11 +44,7 @@
                     typename std::iterator_traits<BaseIterator>::iterator_category>::value,
                 "Transform iterator base must be an input iterator.");
 
-  using InputType =
-      typename std::conditional<
-          std::is_same<void, typename std::iterator_traits<BaseIterator>::reference>::value,
-          typename std::iterator_traits<BaseIterator>::value_type,
-          typename std::iterator_traits<BaseIterator>::reference>::type;
+  using InputType = typename std::iterator_traits<BaseIterator>::reference;
   using ResultType = typename std::result_of<Function(InputType)>::type;
 
  public:
diff --git a/compiler/utils/transform_iterator_test.cc b/compiler/utils/transform_iterator_test.cc
index dbb4779..57ff0a6 100644
--- a/compiler/utils/transform_iterator_test.cc
+++ b/compiler/utils/transform_iterator_test.cc
@@ -20,8 +20,6 @@
 #include <type_traits>
 #include <vector>
 
-#include <array>
-
 #include "gtest/gtest.h"
 
 #include "utils/transform_iterator.h"
diff --git a/compiler/utils/type_reference.h b/compiler/utils/type_reference.h
new file mode 100644
index 0000000..bd0739f
--- /dev/null
+++ b/compiler/utils/type_reference.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_TYPE_REFERENCE_H_
+#define ART_COMPILER_UTILS_TYPE_REFERENCE_H_
+
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "utils/string_reference.h"
+
+namespace art {
+
+class DexFile;
+
+// A type is located by its DexFile and the string_ids_ table index into that DexFile.
+struct TypeReference {
+  TypeReference(const DexFile* file, uint32_t index) : dex_file(file), type_index(index) { }
+
+  const DexFile* dex_file;
+  uint32_t type_index;
+};
+
+// Compare the actual referenced type names. Used for type reference deduplication.
+struct TypeReferenceValueComparator {
+  bool operator()(TypeReference tr1, TypeReference tr2) const {
+    // Note that we want to deduplicate identical boot image types even if they are
+    // referenced by different dex files, so we simply compare the descriptors.
+    StringReference sr1(tr1.dex_file, tr1.dex_file->GetTypeId(tr1.type_index).descriptor_idx_);
+    StringReference sr2(tr2.dex_file, tr2.dex_file->GetTypeId(tr2.type_index).descriptor_idx_);
+    return StringReferenceValueComparator()(sr1, sr2);
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_TYPE_REFERENCE_H_
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 2dce2f1..c133980 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -22,6 +22,7 @@
 
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <sstream>
 #include <string>
 #include <unordered_set>
@@ -81,6 +82,9 @@
 
 namespace art {
 
+static constexpr size_t kDefaultMinDexFilesForSwap = 2;
+static constexpr size_t kDefaultMinDexFileCumulativeSizeForSwap = 20 * MB;
+
 static int original_argc;
 static char** original_argv;
 
@@ -351,6 +355,20 @@
   UsageError("  --swap-fd=<file-descriptor>:  specifies a file to use for swap (by descriptor).");
   UsageError("      Example: --swap-fd=10");
   UsageError("");
+  UsageError("  --swap-dex-size-threshold=<size>:  specifies the minimum total dex file size in");
+  UsageError("      bytes to allow the use of swap.");
+  UsageError("      Example: --swap-dex-size-threshold=1000000");
+  UsageError("      Default: %zu", kDefaultMinDexFileCumulativeSizeForSwap);
+  UsageError("");
+  UsageError("  --swap-dex-count-threshold=<count>:  specifies the minimum number of dex files to");
+  UsageError("      allow the use of swap.");
+  UsageError("      Example: --swap-dex-count-threshold=10");
+  UsageError("      Default: %zu", kDefaultMinDexFilesForSwap);
+  UsageError("");
+  UsageError("  --very-large-app-threshold=<size>:  specifies the minimum total dex file size in");
+  UsageError("      bytes to consider the input \"very large\" and punt on the compilation.");
+  UsageError("      Example: --very-large-app-threshold=100000000");
+  UsageError("");
   UsageError("  --app-image-fd=<file-descriptor>: specify output file descriptor for app image.");
   UsageError("      Example: --app-image-fd=10");
   UsageError("");
@@ -473,25 +491,6 @@
   pthread_t pthread_;
 };
 
-static constexpr size_t kMinDexFilesForSwap = 2;
-static constexpr size_t kMinDexFileCumulativeSizeForSwap = 20 * MB;
-
-static bool UseSwap(bool is_image, std::vector<const DexFile*>& dex_files) {
-  if (is_image) {
-    // Don't use swap, we know generation should succeed, and we don't want to slow it down.
-    return false;
-  }
-  if (dex_files.size() < kMinDexFilesForSwap) {
-    // If there are less dex files than the threshold, assume it's gonna be fine.
-    return false;
-  }
-  size_t dex_files_size = 0;
-  for (const auto* dex_file : dex_files) {
-    dex_files_size += dex_file->GetHeader().file_size_;
-  }
-  return dex_files_size >= kMinDexFileCumulativeSizeForSwap;
-}
-
 class Dex2Oat FINAL {
  public:
   explicit Dex2Oat(TimingLogger* timings) :
@@ -1132,6 +1131,21 @@
         swap_file_name_ = option.substr(strlen("--swap-file=")).data();
       } else if (option.starts_with("--swap-fd=")) {
         ParseUintOption(option, "--swap-fd", &swap_fd_, Usage);
+      } else if (option.starts_with("--swap-dex-size-threshold=")) {
+        ParseUintOption(option,
+                        "--swap-dex-size-threshold",
+                        &min_dex_file_cumulative_size_for_swap_,
+                        Usage);
+      } else if (option.starts_with("--swap-dex-count-threshold=")) {
+        ParseUintOption(option,
+                        "--swap-dex-count-threshold",
+                        &min_dex_files_for_swap_,
+                        Usage);
+      } else if (option.starts_with("--very-large-app-threshold=")) {
+        ParseUintOption(option,
+                        "--very-large-app-threshold",
+                        &very_large_threshold_,
+                        Usage);
       } else if (option.starts_with("--app-image-file=")) {
         app_image_file_name_ = option.substr(strlen("--app-image-file=")).data();
       } else if (option.starts_with("--app-image-fd=")) {
@@ -1414,6 +1428,19 @@
     }
     // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
 
+    // If we need to downgrade the compiler-filter for size reasons, do that check now.
+    if (!IsBootImage() && IsVeryLarge(dex_files_)) {
+      if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime,
+                                      compiler_options_->GetCompilerFilter())) {
+        LOG(INFO) << "Very large app, downgrading to verify-at-runtime.";
+        // Note: this change won't be reflected in the key-value store, as that had to be
+        //       finalized before loading the dex files. This setup is currently required
+        //       to get the size from the DexFile objects.
+        // TODO: refactor. b/29790079
+        compiler_options_->SetCompilerFilter(CompilerFilter::kVerifyAtRuntime);
+      }
+    }
+
     if (IsBootImage()) {
       // For boot image, pass opened dex files to the Runtime::Create().
       // Note: Runtime acquires ownership of these dex files.
@@ -1842,10 +1869,6 @@
     }
   }
 
-  CompilerOptions* GetCompilerOptions() const {
-    return compiler_options_.get();
-  }
-
   bool IsImage() const {
     return IsAppImage() || IsBootImage();
   }
@@ -1897,6 +1920,30 @@
   }
 
  private:
+  bool UseSwap(bool is_image, const std::vector<const DexFile*>& dex_files) {
+    if (is_image) {
+      // Don't use swap, we know generation should succeed, and we don't want to slow it down.
+      return false;
+    }
+    if (dex_files.size() < min_dex_files_for_swap_) {
+      // If there are less dex files than the threshold, assume it's gonna be fine.
+      return false;
+    }
+    size_t dex_files_size = 0;
+    for (const auto* dex_file : dex_files) {
+      dex_files_size += dex_file->GetHeader().file_size_;
+    }
+    return dex_files_size >= min_dex_file_cumulative_size_for_swap_;
+  }
+
+  bool IsVeryLarge(std::vector<const DexFile*>& dex_files) {
+    size_t dex_files_size = 0;
+    for (const auto* dex_file : dex_files) {
+      dex_files_size += dex_file->GetHeader().file_size_;
+    }
+    return dex_files_size >= very_large_threshold_;
+  }
+
   template <typename T>
   static std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) {
     std::vector<T*> result;
@@ -1985,8 +2032,10 @@
       if (location == OatFile::kSpecialSharedLibrary) {
         break;
       }
+      static constexpr bool kVerifyChecksum = true;
       std::string error_msg;
-      if (!DexFile::Open(location.c_str(), location.c_str(), &error_msg, opened_dex_files)) {
+      if (!DexFile::Open(
+          location.c_str(), location.c_str(), kVerifyChecksum, &error_msg, opened_dex_files)) {
         // If we fail to open the dex file because it's been stripped, try to open the dex file
         // from its corresponding oat file.
         OatFileAssistant oat_file_assistant(location.c_str(), isa, false, false);
@@ -2484,6 +2533,9 @@
   bool dump_slow_timing_;
   std::string swap_file_name_;
   int swap_fd_;
+  size_t min_dex_files_for_swap_ = kDefaultMinDexFilesForSwap;
+  size_t min_dex_file_cumulative_size_for_swap_ = kDefaultMinDexFileCumulativeSizeForSwap;
+  size_t very_large_threshold_ = std::numeric_limits<size_t>::max();
   std::string app_image_file_name_;
   int app_image_fd_;
   std::string profile_file_;
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
new file mode 100644
index 0000000..93351e9
--- /dev/null
+++ b/dex2oat/dex2oat_test.cc
@@ -0,0 +1,430 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+#include <vector>
+#include <sstream>
+
+#include "common_runtime_test.h"
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/stringprintf.h"
+#include "dex2oat_environment_test.h"
+#include "oat.h"
+#include "oat_file.h"
+#include "utils.h"
+
+#include <sys/wait.h>
+#include <unistd.h>
+
+namespace art {
+
+class Dex2oatTest : public Dex2oatEnvironmentTest {
+ public:
+  virtual void TearDown() OVERRIDE {
+    Dex2oatEnvironmentTest::TearDown();
+
+    output_ = "";
+    error_msg_ = "";
+    success_ = false;
+  }
+
+ protected:
+  void GenerateOdexForTest(const std::string& dex_location,
+                           const std::string& odex_location,
+                           CompilerFilter::Filter filter,
+                           const std::vector<std::string>& extra_args = {},
+                           bool expect_success = true) {
+    std::vector<std::string> args;
+    args.push_back("--dex-file=" + dex_location);
+    args.push_back("--oat-file=" + odex_location);
+    args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
+    args.push_back("--runtime-arg");
+    args.push_back("-Xnorelocate");
+
+    args.insert(args.end(), extra_args.begin(), extra_args.end());
+
+    std::string error_msg;
+    bool success = Dex2Oat(args, &error_msg);
+
+    if (expect_success) {
+      ASSERT_TRUE(success) << error_msg;
+
+      // Verify the odex file was generated as expected.
+      std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
+                                                       odex_location.c_str(),
+                                                       nullptr,
+                                                       nullptr,
+                                                       false,
+                                                       /*low_4gb*/false,
+                                                       dex_location.c_str(),
+                                                       &error_msg));
+      ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
+
+      CheckFilter(filter, odex_file->GetCompilerFilter());
+    } else {
+      ASSERT_FALSE(success) << output_;
+
+      error_msg_ = error_msg;
+
+      // Verify there's no loadable odex file.
+      std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
+                                                       odex_location.c_str(),
+                                                       nullptr,
+                                                       nullptr,
+                                                       false,
+                                                       /*low_4gb*/false,
+                                                       dex_location.c_str(),
+                                                       &error_msg));
+      ASSERT_TRUE(odex_file.get() == nullptr);
+    }
+  }
+
+  // Check the input compiler filter against the generated oat file's filter. Mayb be overridden
+  // in subclasses when equality is not expected.
+  virtual void CheckFilter(CompilerFilter::Filter expected, CompilerFilter::Filter actual) {
+    EXPECT_EQ(expected, actual);
+  }
+
+  bool Dex2Oat(const std::vector<std::string>& dex2oat_args, std::string* error_msg) {
+    Runtime* runtime = Runtime::Current();
+
+    const std::vector<gc::space::ImageSpace*>& image_spaces =
+        runtime->GetHeap()->GetBootImageSpaces();
+    if (image_spaces.empty()) {
+      *error_msg = "No image location found for Dex2Oat.";
+      return false;
+    }
+    std::string image_location = image_spaces[0]->GetImageLocation();
+
+    std::vector<std::string> argv;
+    argv.push_back(runtime->GetCompilerExecutable());
+    argv.push_back("--runtime-arg");
+    argv.push_back("-classpath");
+    argv.push_back("--runtime-arg");
+    std::string class_path = runtime->GetClassPathString();
+    if (class_path == "") {
+      class_path = OatFile::kSpecialSharedLibrary;
+    }
+    argv.push_back(class_path);
+    if (runtime->IsDebuggable()) {
+      argv.push_back("--debuggable");
+    }
+    runtime->AddCurrentRuntimeFeaturesAsDex2OatArguments(&argv);
+
+    if (!runtime->IsVerificationEnabled()) {
+      argv.push_back("--compiler-filter=verify-none");
+    }
+
+    if (runtime->MustRelocateIfPossible()) {
+      argv.push_back("--runtime-arg");
+      argv.push_back("-Xrelocate");
+    } else {
+      argv.push_back("--runtime-arg");
+      argv.push_back("-Xnorelocate");
+    }
+
+    if (!kIsTargetBuild) {
+      argv.push_back("--host");
+    }
+
+    argv.push_back("--boot-image=" + image_location);
+
+    std::vector<std::string> compiler_options = runtime->GetCompilerOptions();
+    argv.insert(argv.end(), compiler_options.begin(), compiler_options.end());
+
+    argv.insert(argv.end(), dex2oat_args.begin(), dex2oat_args.end());
+
+    // We must set --android-root.
+    const char* android_root = getenv("ANDROID_ROOT");
+    CHECK(android_root != nullptr);
+    argv.push_back("--android-root=" + std::string(android_root));
+
+    int link[2];
+
+    if (pipe(link) == -1) {
+      return false;
+    }
+
+    pid_t pid = fork();
+    if (pid == -1) {
+      return false;
+    }
+
+    if (pid == 0) {
+      // We need dex2oat to actually log things.
+      setenv("ANDROID_LOG_TAGS", "*:d", 1);
+      dup2(link[1], STDERR_FILENO);
+      close(link[0]);
+      close(link[1]);
+      std::vector<const char*> c_args;
+      for (const std::string& str : argv) {
+        c_args.push_back(str.c_str());
+      }
+      c_args.push_back(nullptr);
+      execv(c_args[0], const_cast<char* const*>(c_args.data()));
+      exit(1);
+    } else {
+      close(link[1]);
+      char buffer[128];
+      memset(buffer, 0, 128);
+      ssize_t bytes_read = 0;
+
+      while (TEMP_FAILURE_RETRY(bytes_read = read(link[0], buffer, 128)) > 0) {
+        output_ += std::string(buffer, bytes_read);
+      }
+      close(link[0]);
+      int status = 0;
+      if (waitpid(pid, &status, 0) != -1) {
+        success_ = (status == 0);
+      }
+    }
+    return success_;
+  }
+
+  std::string output_ = "";
+  std::string error_msg_ = "";
+  bool success_ = false;
+};
+
+class Dex2oatSwapTest : public Dex2oatTest {
+ protected:
+  void RunTest(bool use_fd, bool expect_use, const std::vector<std::string>& extra_args = {}) {
+    std::string dex_location = GetScratchDir() + "/Dex2OatSwapTest.jar";
+    std::string odex_location = GetOdexDir() + "/Dex2OatSwapTest.odex";
+
+    Copy(GetDexSrc1(), dex_location);
+
+    std::vector<std::string> copy(extra_args);
+
+    std::unique_ptr<ScratchFile> sf;
+    if (use_fd) {
+      sf.reset(new ScratchFile());
+      copy.push_back(StringPrintf("--swap-fd=%d", sf->GetFd()));
+    } else {
+      std::string swap_location = GetOdexDir() + "/Dex2OatSwapTest.odex.swap";
+      copy.push_back("--swap-file=" + swap_location);
+    }
+    GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed, copy);
+
+    CheckValidity();
+    ASSERT_TRUE(success_);
+    CheckResult(expect_use);
+  }
+
+  void CheckResult(bool expect_use) {
+    if (kIsTargetBuild) {
+      CheckTargetResult(expect_use);
+    } else {
+      CheckHostResult(expect_use);
+    }
+  }
+
+  void CheckTargetResult(bool expect_use ATTRIBUTE_UNUSED) {
+    // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
+    //       something for variants with file descriptor where we can control the lifetime of
+    //       the swap file and thus take a look at it.
+  }
+
+  void CheckHostResult(bool expect_use) {
+    if (!kIsTargetBuild) {
+      if (expect_use) {
+        EXPECT_NE(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      } else {
+        EXPECT_EQ(output_.find("Large app, accepted running with swap."), std::string::npos)
+            << output_;
+      }
+    }
+  }
+
+  // Check whether the dex2oat run was really successful.
+  void CheckValidity() {
+    if (kIsTargetBuild) {
+      CheckTargetValidity();
+    } else {
+      CheckHostValidity();
+    }
+  }
+
+  void CheckTargetValidity() {
+    // TODO: Ignore for now, as we won't capture any output (it goes to the logcat). We may do
+    //       something for variants with file descriptor where we can control the lifetime of
+    //       the swap file and thus take a look at it.
+  }
+
+  // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
+  void CheckHostValidity() {
+    EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
+  }
+};
+
+TEST_F(Dex2oatSwapTest, DoNotUseSwapDefaultSingleSmall) {
+  RunTest(false /* use_fd */, false /* expect_use */);
+  RunTest(true /* use_fd */, false /* expect_use */);
+}
+
+TEST_F(Dex2oatSwapTest, DoNotUseSwapSingle) {
+  RunTest(false /* use_fd */, false /* expect_use */, { "--swap-dex-size-threshold=0" });
+  RunTest(true /* use_fd */, false /* expect_use */, { "--swap-dex-size-threshold=0" });
+}
+
+TEST_F(Dex2oatSwapTest, DoNotUseSwapSmall) {
+  RunTest(false /* use_fd */, false /* expect_use */, { "--swap-dex-count-threshold=0" });
+  RunTest(true /* use_fd */, false /* expect_use */, { "--swap-dex-count-threshold=0" });
+}
+
+TEST_F(Dex2oatSwapTest, DoUseSwapSingleSmall) {
+  RunTest(false /* use_fd */,
+          true /* expect_use */,
+          { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
+  RunTest(true /* use_fd */,
+          true /* expect_use */,
+          { "--swap-dex-size-threshold=0", "--swap-dex-count-threshold=0" });
+}
+
+class Dex2oatVeryLargeTest : public Dex2oatTest {
+ protected:
+  void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
+                   CompilerFilter::Filter result ATTRIBUTE_UNUSED) OVERRIDE {
+    // Ignore, we'll do our own checks.
+  }
+
+  void RunTest(CompilerFilter::Filter filter,
+               bool expect_large,
+               const std::vector<std::string>& extra_args = {}) {
+    std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
+    std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
+
+    Copy(GetDexSrc1(), dex_location);
+
+    std::vector<std::string> copy(extra_args);
+
+    GenerateOdexForTest(dex_location, odex_location, filter, copy);
+
+    CheckValidity();
+    ASSERT_TRUE(success_);
+    CheckResult(dex_location, odex_location, filter, expect_large);
+  }
+
+  void CheckResult(const std::string& dex_location,
+                   const std::string& odex_location,
+                   CompilerFilter::Filter filter,
+                   bool expect_large) {
+    // Host/target independent checks.
+    std::string error_msg;
+    std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
+                                                     odex_location.c_str(),
+                                                     nullptr,
+                                                     nullptr,
+                                                     false,
+                                                     /*low_4gb*/false,
+                                                     dex_location.c_str(),
+                                                     &error_msg));
+    ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
+    if (expect_large) {
+      // Note: we cannot check the following:
+      //   EXPECT_TRUE(CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime,
+      //                                          odex_file->GetCompilerFilter()));
+      // The reason is that the filter override currently happens when the dex files are
+      // loaded in dex2oat, which is after the oat file has been started. Thus, the header
+      // store cannot be changed, and the original filter is set in stone.
+
+      for (const OatDexFile* oat_dex_file : odex_file->GetOatDexFiles()) {
+        std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+        ASSERT_TRUE(dex_file != nullptr);
+        uint32_t class_def_count = dex_file->NumClassDefs();
+        ASSERT_LT(class_def_count, std::numeric_limits<uint16_t>::max());
+        for (uint16_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) {
+          OatFile::OatClass oat_class = oat_dex_file->GetOatClass(class_def_index);
+          EXPECT_EQ(oat_class.GetType(), OatClassType::kOatClassNoneCompiled);
+        }
+      }
+
+      // If the input filter was "below," it should have been used.
+      if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime, filter)) {
+        EXPECT_EQ(odex_file->GetCompilerFilter(), filter);
+      }
+    } else {
+      EXPECT_EQ(odex_file->GetCompilerFilter(), filter);
+    }
+
+    // Host/target dependent checks.
+    if (kIsTargetBuild) {
+      CheckTargetResult(expect_large);
+    } else {
+      CheckHostResult(expect_large);
+    }
+  }
+
+  void CheckTargetResult(bool expect_large ATTRIBUTE_UNUSED) {
+    // TODO: Ignore for now. May do something for fd things.
+  }
+
+  void CheckHostResult(bool expect_large) {
+    if (!kIsTargetBuild) {
+      if (expect_large) {
+        EXPECT_NE(output_.find("Very large app, downgrading to verify-at-runtime."),
+                  std::string::npos)
+            << output_;
+      } else {
+        EXPECT_EQ(output_.find("Very large app, downgrading to verify-at-runtime."),
+                  std::string::npos)
+            << output_;
+      }
+    }
+  }
+
+  // Check whether the dex2oat run was really successful.
+  void CheckValidity() {
+    if (kIsTargetBuild) {
+      CheckTargetValidity();
+    } else {
+      CheckHostValidity();
+    }
+  }
+
+  void CheckTargetValidity() {
+    // TODO: Ignore for now.
+  }
+
+  // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
+  void CheckHostValidity() {
+    EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
+  }
+};
+
+TEST_F(Dex2oatVeryLargeTest, DontUseVeryLarge) {
+  RunTest(CompilerFilter::kVerifyNone, false);
+  RunTest(CompilerFilter::kVerifyAtRuntime, false);
+  RunTest(CompilerFilter::kInterpretOnly, false);
+  RunTest(CompilerFilter::kSpeed, false);
+
+  RunTest(CompilerFilter::kVerifyNone, false, { "--very-large-app-threshold=1000000" });
+  RunTest(CompilerFilter::kVerifyAtRuntime, false, { "--very-large-app-threshold=1000000" });
+  RunTest(CompilerFilter::kInterpretOnly, false, { "--very-large-app-threshold=1000000" });
+  RunTest(CompilerFilter::kSpeed, false, { "--very-large-app-threshold=1000000" });
+}
+
+TEST_F(Dex2oatVeryLargeTest, UseVeryLarge) {
+  RunTest(CompilerFilter::kVerifyNone, false, { "--very-large-app-threshold=100" });
+  RunTest(CompilerFilter::kVerifyAtRuntime, false, { "--very-large-app-threshold=100" });
+  RunTest(CompilerFilter::kInterpretOnly, true, { "--very-large-app-threshold=100" });
+  RunTest(CompilerFilter::kSpeed, true, { "--very-large-app-threshold=100" });
+}
+
+}  // namespace art
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 9e06a7c..48b773e 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -1635,11 +1635,10 @@
 
   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
   // all of which are Zip archives with "classes.dex" inside.
-  //
-  // TODO(ajcbik): implement gOptions.ignoreBadChecksum
+  const bool kVerifyChecksum = !gOptions.ignoreBadChecksum;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  if (!DexFile::Open(fileName, fileName, &error_msg, &dex_files)) {
+  if (!DexFile::Open(fileName, fileName, kVerifyChecksum, &error_msg, &dex_files)) {
     // Display returned error message to user. Note that this error behavior
     // differs from the error messages shown by the original Dalvik dexdump.
     fputs(error_msg.c_str(), stderr);
diff --git a/dexdump/dexdump_main.cc b/dexdump/dexdump_main.cc
index 32e9d52..f716ba8 100644
--- a/dexdump/dexdump_main.cc
+++ b/dexdump/dexdump_main.cc
@@ -17,8 +17,8 @@
  *
  * This is a re-implementation of the original dexdump utility that was
  * based on Dalvik functions in libdex into a new dexdump that is now
- * based on Art functions in libart instead. The output is identical to
- * the original for correct DEX files. Error messages may differ, however.
+ * based on Art functions in libart instead. The output is very similar to
+ * to the original for correct DEX files. Error messages may differ, however.
  * Also, ODEX files are no longer supported.
  */
 
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index d20c169..6f19df5 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -180,9 +180,10 @@
 static int processFile(const char* fileName) {
   // If the file is not a .dex file, the function tries .zip/.jar/.apk files,
   // all of which are Zip archives with "classes.dex" inside.
+  static constexpr bool kVerifyChecksum = true;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  if (!DexFile::Open(fileName, fileName, &error_msg, &dex_files)) {
+  if (!DexFile::Open(fileName, fileName, kVerifyChecksum, &error_msg, &dex_files)) {
     fputs(error_msg.c_str(), stderr);
     fputc('\n', stderr);
     return -1;
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 286faf2..1a3e3f5 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -1497,6 +1497,25 @@
           }
           break;
         }
+        case 0x7B: case 0x7F: {
+          FpRegister d(instr, 12, 22);
+          FpRegister m(instr, 0, 5);
+          uint32_t sz = (instr >> 18) & 0x3;  // Decode size bits.
+          uint32_t size = (sz == 0) ? 8 : sz << 4;
+          uint32_t opc2 = (instr >> 7) & 0xF;
+          uint32_t Q = (instr >> 6) & 1;
+          if (Q == 0 && opc2 == 0xA && size == 8) {  // 1010, VCNT
+            opcode << "vcnt." << size;
+            args << d << ", " << m;
+          } else if (Q == 0 && (opc2 == 0x4 || opc2 == 0x5) && size <= 32) {  // 010x, VPADDL
+            bool op = HasBitSet(instr, 7);
+            opcode << "vpaddl." << (op ? "u" : "s") << size;
+            args << d << ", " << m;
+          } else {
+            opcode << "UNKNOWN " << op2;
+          }
+          break;
+        }
       default:      // more formats
         if ((op2 >> 4) == 2) {      // 010xxxx
           // data processing (register)
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index a95ea64..769263e 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -330,8 +330,10 @@
   { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
   { kITypeMask, 56u << kOpcodeShift, "sc", "TO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
+  { kJTypeMask, 58u << kOpcodeShift, "balc", "P" },
   { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" },
   { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (0 << 19), "addiupc", "Sp" },
+  { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (1 << 19), "lwpc", "So" },
   { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
   { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" },
   { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" },  // TODO: de-dup?
@@ -384,6 +386,8 @@
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 13, "trunc.w", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 14, "ceil.w", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 15, "floor.w", "fad" },
+  { kFpMask | (0x201 << 16), kCop1 | (0x200 << 16) | 17, "movf", "fadc" },
+  { kFpMask | (0x201 << 16), kCop1 | (0x201 << 16) | 17, "movt", "fadc" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 26, "rint", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 27, "class", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 32, "cvt.s", "fad" },
@@ -507,7 +511,15 @@
               }
             }
             break;
-          case 'P':  // 26-bit offset in bc.
+          case 'o':  // 19-bit offset in lwpc.
+            {
+              int32_t offset = (instruction & 0x7ffff) - ((instruction & 0x40000) << 1);
+              offset <<= 2;
+              args << FormatInstructionPointer(instr_ptr + offset);
+              args << StringPrintf("  ; %+d", offset);
+            }
+            break;
+          case 'P':  // 26-bit offset in bc and balc.
             {
               int32_t offset = (instruction & 0x3ffffff) - ((instruction & 0x2000000) << 1);
               offset <<= 2;
@@ -538,6 +550,7 @@
     }
   }
 
+  // TODO: Simplify this once these sequences are simplified in the compiler.
   // Special cases for sequences of:
   //   pc-relative +/- 2GB branch:
   //     auipc  reg, imm
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 7239a47..3f031a3 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -35,7 +35,7 @@
 #include "debug/elf_debug_writer.h"
 #include "debug/method_debug_info.h"
 #include "dex_file-inl.h"
-#include "dex_instruction.h"
+#include "dex_instruction-inl.h"
 #include "disassembler.h"
 #include "elf_builder.h"
 #include "gc/space/image_space.h"
@@ -578,6 +578,7 @@
     // Print embedded dex file data range.
     const uint8_t* const oat_file_begin = oat_dex_file.GetOatFile()->Begin();
     const uint8_t* const dex_file_pointer = oat_dex_file.GetDexFilePointer();
+    std::set<uint32_t> string_ids;
     uint32_t dex_offset = dchecked_integral_cast<uint32_t>(dex_file_pointer - oat_file_begin);
     os << StringPrintf("dex-file: 0x%08x..0x%08x\n",
                        dex_offset,
@@ -623,7 +624,7 @@
          << " (" << oat_class.GetType() << ")\n";
       // TODO: include bitmap here if type is kOatClassSomeCompiled?
       if (options_.list_classes_) continue;
-      if (!DumpOatClass(&vios, oat_class, *dex_file, class_def, &stop_analysis)) {
+      if (!DumpOatClass(&vios, oat_class, *dex_file, class_def, &stop_analysis, string_ids)) {
         success = false;
       }
       if (stop_analysis) {
@@ -631,7 +632,7 @@
         return success;
       }
     }
-
+    os << "Number of unique strings loaded from dex code: " << string_ids.size() << "\n";
     os << std::flush;
     return success;
   }
@@ -725,7 +726,8 @@
 
   bool DumpOatClass(VariableIndentationOutputStream* vios,
                     const OatFile::OatClass& oat_class, const DexFile& dex_file,
-                    const DexFile::ClassDef& class_def, bool* stop_analysis) {
+                    const DexFile::ClassDef& class_def, bool* stop_analysis,
+                    std::set<uint32_t>& string_ids) {
     bool success = true;
     bool addr_found = false;
     const uint8_t* class_data = dex_file.GetClassData(class_def);
@@ -739,7 +741,7 @@
     while (it.HasNextDirectMethod()) {
       if (!DumpOatMethod(vios, class_def, class_method_index, oat_class, dex_file,
                          it.GetMemberIndex(), it.GetMethodCodeItem(),
-                         it.GetRawMemberAccessFlags(), &addr_found)) {
+                         it.GetRawMemberAccessFlags(), &addr_found, string_ids)) {
         success = false;
       }
       if (addr_found) {
@@ -752,7 +754,7 @@
     while (it.HasNextVirtualMethod()) {
       if (!DumpOatMethod(vios, class_def, class_method_index, oat_class, dex_file,
                          it.GetMemberIndex(), it.GetMethodCodeItem(),
-                         it.GetRawMemberAccessFlags(), &addr_found)) {
+                         it.GetRawMemberAccessFlags(), &addr_found, string_ids)) {
         success = false;
       }
       if (addr_found) {
@@ -777,9 +779,35 @@
                      uint32_t class_method_index,
                      const OatFile::OatClass& oat_class, const DexFile& dex_file,
                      uint32_t dex_method_idx, const DexFile::CodeItem* code_item,
-                     uint32_t method_access_flags, bool* addr_found) {
+                     uint32_t method_access_flags, bool* addr_found,
+                     std::set<uint32_t>& string_ids) {
     bool success = true;
 
+    if (code_item != nullptr) {
+      const uint16_t* code_ptr = code_item->insns_;
+      const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_;
+
+      while (code_ptr < code_end) {
+        const Instruction* inst = Instruction::At(code_ptr);
+        switch (inst->Opcode()) {
+          case Instruction::CONST_STRING: {
+            uint32_t string_index = inst->VRegB_21c();
+            string_ids.insert(string_index);
+            break;
+          }
+          case Instruction::CONST_STRING_JUMBO: {
+            uint32_t string_index = inst->VRegB_31c();
+            string_ids.insert(string_index);
+            break;
+          }
+
+          default:
+            break;
+        }
+
+        code_ptr += inst->SizeInCodeUnits();
+      }
+    }
     // TODO: Support regex
     std::string method_name = dex_file.GetMethodName(dex_file.GetMethodId(dex_method_idx));
     if (method_name.find(options_.method_filter_) == std::string::npos) {
@@ -1723,7 +1751,7 @@
         const auto& method_section = state->image_header_.GetMethodsSection();
         size_t num_methods = dex_cache->NumResolvedMethods();
         if (num_methods != 0u) {
-          os << "Methods (size=" << num_methods << "):";
+          os << "Methods (size=" << num_methods << "):\n";
           ScopedIndentation indent2(&state->vios_);
           auto* resolved_methods = dex_cache->GetResolvedMethods();
           for (size_t i = 0, length = dex_cache->NumResolvedMethods(); i < length; ++i) {
@@ -1732,10 +1760,12 @@
                                                              image_pointer_size);
             size_t run = 0;
             for (size_t j = i + 1;
-                j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods,
-                                                                           j,
-                                                                           image_pointer_size);
-                ++j, ++run) {}
+                 j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods,
+                                                                            j,
+                                                                            image_pointer_size);
+                 ++j) {
+              ++run;
+            }
             if (run == 0) {
               os << StringPrintf("%zd: ", i);
             } else {
@@ -1756,17 +1786,20 @@
         }
         size_t num_fields = dex_cache->NumResolvedFields();
         if (num_fields != 0u) {
-          os << "Fields (size=" << num_fields << "):";
+          os << "Fields (size=" << num_fields << "):\n";
           ScopedIndentation indent2(&state->vios_);
           auto* resolved_fields = dex_cache->GetResolvedFields();
           for (size_t i = 0, length = dex_cache->NumResolvedFields(); i < length; ++i) {
-            auto* elem = mirror::DexCache::GetElementPtrSize(resolved_fields, i, image_pointer_size);
+            auto* elem = mirror::DexCache::GetElementPtrSize(
+                resolved_fields, i, image_pointer_size);
             size_t run = 0;
             for (size_t j = i + 1;
-                j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields,
-                                                                           j,
-                                                                           image_pointer_size);
-                ++j, ++run) {}
+                 j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields,
+                                                                            j,
+                                                                            image_pointer_size);
+                 ++j) {
+              ++run;
+            }
             if (run == 0) {
               os << StringPrintf("%zd: ", i);
             } else {
@@ -1785,6 +1818,32 @@
             os << StringPrintf("%p   %s\n", elem, msg.c_str());
           }
         }
+        size_t num_types = dex_cache->NumResolvedTypes();
+        if (num_types != 0u) {
+          os << "Types (size=" << num_types << "):\n";
+          ScopedIndentation indent2(&state->vios_);
+          auto* resolved_types = dex_cache->GetResolvedTypes();
+          for (size_t i = 0; i < num_types; ++i) {
+            auto* elem = resolved_types[i].Read();
+            size_t run = 0;
+            for (size_t j = i + 1; j != num_types && elem == resolved_types[j].Read(); ++j) {
+              ++run;
+            }
+            if (run == 0) {
+              os << StringPrintf("%zd: ", i);
+            } else {
+              os << StringPrintf("%zd to %zd: ", i, i + run);
+              i = i + run;
+            }
+            std::string msg;
+            if (elem == nullptr) {
+              msg = "null";
+            } else {
+              msg = PrettyClass(elem);
+            }
+            os << StringPrintf("%p   %s\n", elem, msg.c_str());
+          }
+        }
       }
     }
     std::string temp;
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 5bb61bb..0a7ffda 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -494,17 +494,6 @@
   image_header->VisitPackedArtMethods(&visitor, heap_->Begin(), pointer_size);
 }
 
-void PatchOat::PatchImTables(const ImageHeader* image_header) {
-  const size_t pointer_size = InstructionSetPointerSize(isa_);
-  // We can safely walk target image since the conflict tables are independent.
-  image_header->VisitPackedImTables(
-      [this](ArtMethod* method) {
-        return RelocatedAddressOfPointer(method);
-      },
-      image_->Begin(),
-      pointer_size);
-}
-
 void PatchOat::PatchImtConflictTables(const ImageHeader* image_header) {
   const size_t pointer_size = InstructionSetPointerSize(isa_);
   // We can safely walk target image since the conflict tables are independent.
@@ -647,7 +636,6 @@
 
   PatchArtFields(image_header);
   PatchArtMethods(image_header);
-  PatchImTables(image_header);
   PatchImtConflictTables(image_header);
   PatchInternedStrings(image_header);
   PatchClassTable(image_header);
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 61ec695..3ef837f 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -117,7 +117,6 @@
   bool PatchImage(bool primary_image) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtFields(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
-  void PatchImTables(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchImtConflictTables(const ImageHeader* image_header)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index 462c397..cd0aa6f 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -61,17 +61,21 @@
     ASSERT_TRUE(file_info.Equals(info));
   }
 
-    // Runs test with given arguments.
-  int ProcessProfiles(const std::vector<int>& profiles_fd, int reference_profile_fd) {
+  std::string GetProfmanCmd() {
     std::string file_path = GetTestAndroidRoot();
     file_path += "/bin/profman";
     if (kIsDebugBuild) {
       file_path += "d";
     }
-
-    EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
+    EXPECT_TRUE(OS::FileExists(file_path.c_str()))
+        << file_path << " should be a valid file path";
+    return file_path;
+  }
+  // Runs test with given arguments.
+  int ProcessProfiles(const std::vector<int>& profiles_fd, int reference_profile_fd) {
+    std::string profman_cmd = GetProfmanCmd();
     std::vector<std::string> argv_str;
-    argv_str.push_back(file_path);
+    argv_str.push_back(profman_cmd);
     for (size_t k = 0; k < profiles_fd.size(); k++) {
       argv_str.push_back("--profile-file-fd=" + std::to_string(profiles_fd[k]));
     }
@@ -80,6 +84,15 @@
     std::string error;
     return ExecAndReturnCode(argv_str, &error);
   }
+
+  bool GenerateTestProfile(const std::string& filename) {
+    std::string profman_cmd = GetProfmanCmd();
+    std::vector<std::string> argv_str;
+    argv_str.push_back(profman_cmd);
+    argv_str.push_back("--generate-test-profile=" + filename);
+    std::string error;
+    return ExecAndReturnCode(argv_str, &error);
+  }
 };
 
 TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
@@ -282,4 +295,15 @@
   CheckProfileInfo(profile1, info1);
 }
 
+TEST_F(ProfileAssistantTest, TestProfileGeneration) {
+  ScratchFile profile;
+  // Generate a test profile.
+  GenerateTestProfile(profile.GetFilename());
+
+  // Verify that the generated profile is valid and can be loaded.
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(info.Load(GetFd(profile)));
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index 754e431..a5fefa7 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -100,6 +100,14 @@
   UsageError("  --reference-profile-file-fd=<number>: same as --reference-profile-file but");
   UsageError("      accepts a file descriptor. Cannot be used together with");
   UsageError("      --reference-profile-file.");
+  UsageError("  --generate-test-profile=<filename>: generates a random profile file for testing.");
+  UsageError("  --generate-test-profile-num-dex=<number>: number of dex files that should be");
+  UsageError("      included in the generated profile. Defaults to 20.");
+  UsageError("  --generate-test-profile-method-ratio=<number>: the percentage from the maximum");
+  UsageError("      number of methods that should be generated. Defaults to 5.");
+  UsageError("  --generate-test-profile-class-ratio=<number>: the percentage from the maximum");
+  UsageError("      number of classes that should be generated. Defaults to 5.");
+  UsageError("");
   UsageError("");
   UsageError("  --dex-location=<string>: location string to use with corresponding");
   UsageError("      apk-fd to find dex files");
@@ -111,12 +119,20 @@
   exit(EXIT_FAILURE);
 }
 
+// Note: make sure you update the Usage if you change these values.
+static constexpr uint16_t kDefaultTestProfileNumDex = 20;
+static constexpr uint16_t kDefaultTestProfileMethodRatio = 5;
+static constexpr uint16_t kDefaultTestProfileClassRatio = 5;
+
 class ProfMan FINAL {
  public:
   ProfMan() :
       reference_profile_file_fd_(kInvalidFd),
       dump_only_(false),
       dump_output_to_fd_(kInvalidFd),
+      test_profile_num_dex_(kDefaultTestProfileNumDex),
+      test_profile_method_ratio_(kDefaultTestProfileMethodRatio),
+      test_profile_class_ratio_(kDefaultTestProfileClassRatio),
       start_ns_(NanoTime()) {}
 
   ~ProfMan() {
@@ -159,6 +175,23 @@
         dex_locations_.push_back(option.substr(strlen("--dex-location=")).ToString());
       } else if (option.starts_with("--apk-fd=")) {
         ParseFdForCollection(option, "--apk-fd", &apks_fd_);
+      } else if (option.starts_with("--generate-test-profile=")) {
+        test_profile_ = option.substr(strlen("--generate-test-profile=")).ToString();
+      } else if (option.starts_with("--generate-test-profile-num-dex=")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-num-dex",
+                        &test_profile_num_dex_,
+                        Usage);
+      } else if (option.starts_with("--generate-test-profile-method-ratio")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-method-ratio",
+                        &test_profile_method_ratio_,
+                        Usage);
+      } else if (option.starts_with("--generate-test-profile-class-ratio")) {
+        ParseUintOption(option,
+                        "--generate-test-profile-class-ratio",
+                        &test_profile_class_ratio_,
+                        Usage);
       } else {
         Usage("Unknown argument '%s'", option.data());
       }
@@ -168,6 +201,15 @@
     bool has_reference_profile = !reference_profile_file_.empty() ||
         FdIsValid(reference_profile_file_fd_);
 
+    if (!test_profile_.empty()) {
+      if (test_profile_method_ratio_ > 100) {
+        Usage("Invalid ratio for --generate-test-profile-method-ratio");
+      }
+      if (test_profile_class_ratio_ > 100) {
+        Usage("Invalid ratio for --generate-test-profile-class-ratio");
+      }
+      return;
+    }
     // --dump-only may be specified with only --reference-profiles present.
     if (!dump_only_ && !has_profiles) {
       Usage("No profile files specified.");
@@ -234,6 +276,7 @@
     MemMap::Init();  // for ZipArchive::OpenFromFd
     std::vector<const DexFile*> dex_files;
     assert(dex_locations_.size() == apks_fd_.size());
+    static constexpr bool kVerifyChecksum = true;
     for (size_t i = 0; i < dex_locations_.size(); ++i) {
       std::string error_msg;
       std::vector<std::unique_ptr<const DexFile>> dex_files_for_location;
@@ -246,6 +289,7 @@
       }
       if (DexFile::OpenFromZip(*zip_archive,
                                dex_locations_[i],
+                               kVerifyChecksum,
                                &error_msg,
                                &dex_files_for_location)) {
       } else {
@@ -315,6 +359,25 @@
     return dump_only_;
   }
 
+  int GenerateTestProfile() {
+    int profile_test_fd = open(test_profile_.c_str(), O_CREAT | O_TRUNC | O_WRONLY);
+    if (profile_test_fd < 0) {
+      std::cerr << "Cannot open " << test_profile_ << strerror(errno);
+      return -1;
+    }
+
+    bool result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
+                                                             test_profile_num_dex_,
+                                                             test_profile_method_ratio_,
+                                                             test_profile_class_ratio_);
+    close(profile_test_fd);  // ignore close result.
+    return result ? 0 : -1;
+  }
+
+  bool ShouldGenerateTestProfile() {
+    return !test_profile_.empty();
+  }
+
  private:
   static void ParseFdForCollection(const StringPiece& option,
                                    const char* arg_name,
@@ -348,6 +411,10 @@
   int reference_profile_file_fd_;
   bool dump_only_;
   int dump_output_to_fd_;
+  std::string test_profile_;
+  uint16_t test_profile_num_dex_;
+  uint16_t test_profile_method_ratio_;
+  uint16_t test_profile_class_ratio_;
   uint64_t start_ns_;
 };
 
@@ -358,6 +425,9 @@
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   profman.ParseArgs(argc, argv);
 
+  if (profman.ShouldGenerateTestProfile()) {
+    return profman.GenerateTestProfile();
+  }
   if (profman.ShouldOnlyDumpProfile()) {
     return profman.DumpProfileInfo();
   }
diff --git a/runtime/Android.mk b/runtime/Android.mk
index aa12c83..1c442fc 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -169,7 +169,6 @@
   os_linux.cc \
   parsed_options.cc \
   primitive.cc \
-  profiler.cc \
   quick_exception_handler.cc \
   quick/inline_method_analyser.cc \
   reference_table.cc \
@@ -369,7 +368,6 @@
   oat.h \
   object_callbacks.h \
   process_state.h \
-  profiler_options.h \
   quick/inline_method_analyser.h \
   runtime.h \
   stack.h \
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index d81e0a9..d105c67 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -34,7 +34,7 @@
 
 namespace art {
 
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_implicit_suspend();
 
@@ -107,8 +107,10 @@
   *out_return_pc = (sc->arm_pc + instr_size) | 1;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // ARM PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.  However the mapping table has
@@ -122,7 +124,10 @@
 
   uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->arm_r0 = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index dd7063f..d940164 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -88,12 +88,6 @@
 #endif
 .endm
 
-    // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
-#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected."
-#endif
-.endm
-
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     add sp, #4               @ bottom word holds Method*
     .cfi_adjust_cfa_offset -4
@@ -299,6 +293,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -310,6 +309,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 3e9ad0d..f591fcc 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -29,7 +29,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_implicit_suspend();
 
 //
@@ -84,8 +84,10 @@
   *out_return_pc = sc->pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -95,7 +97,10 @@
 
   sc->regs[30] = sc->pc + 4;      // LR needs to point to gc map location
 
-  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->regs[0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 1fba09b..10ee63f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -406,6 +406,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -417,6 +422,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 45e33a8..833ba1b 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -268,6 +268,8 @@
   static_assert(!IsDirectEntrypoint(kQuickThrowNullPointer), "Non-direct C stub marked direct.");
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
   static_assert(!IsDirectEntrypoint(kQuickThrowStackOverflow), "Non-direct C stub marked direct.");
+  qpoints->pThrowStringBounds = art_quick_throw_string_bounds;
+  static_assert(!IsDirectEntrypoint(kQuickThrowStringBounds), "Non-direct C stub marked direct.");
 
   // Deoptimization from compiled code.
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 8ea78eb..7969a8f 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -27,7 +27,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 
 //
 // Mips specific fault handler functions.
@@ -44,7 +44,7 @@
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]);   // SP register
+  *out_sp = static_cast<uintptr_t>(sc->sc_regs[mips::SP]);
   VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
@@ -56,7 +56,7 @@
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
       reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips));
   if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]);  // A0 register
+    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[mips::A0]);
   } else {
     // The method is at the top of the stack.
     *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
@@ -71,8 +71,10 @@
   *out_return_pc = sc->sc_pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -80,9 +82,12 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
-  sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
-  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  sc->sc_regs[mips::T9] = sc->sc_pc;          // make sure T9 points to the function
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->sc_regs[mips::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -111,7 +116,7 @@
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uintptr_t sp = sc->sc_regs[29];  // SP register
+  uintptr_t sp = sc->sc_regs[mips::SP];
   VLOG(signals) << "sp: " << std::hex << sp;
 
   uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);  // BVA addr
@@ -134,7 +139,7 @@
   // caused this fault.  This will be inserted into a callee save frame by
   // the function to which this handler returns (art_quick_throw_stack_overflow).
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips::T9] = sc->sc_pc;          // make sure T9 points to the function
 
   // The kernel will now return to the address in sc->arm_pc.
   return true;
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index 93d79b7..b3a9866 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -76,21 +76,22 @@
   GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
   // Override defaults based on variant string.
-  // Only care if it is R1, R2 or R6 and we assume all CPUs will have a FP unit.
+  // Only care if it is R1, R2, R5 or R6 and we assume all CPUs will have a FP unit.
   constexpr const char* kMips32Prefix = "mips32r";
   const size_t kPrefixLength = strlen(kMips32Prefix);
   if (variant.compare(0, kPrefixLength, kMips32Prefix, kPrefixLength) == 0 &&
       variant.size() > kPrefixLength) {
-    if (variant[kPrefixLength] >= '6') {
-      fpu_32bit = false;
-      r6 = true;
-    }
-    if (variant[kPrefixLength] >= '2') {
-      mips_isa_gte2 = true;
-    }
+    r6 = (variant[kPrefixLength] >= '6');
+    fpu_32bit = (variant[kPrefixLength] < '5');
+    mips_isa_gte2 = (variant[kPrefixLength] >= '2');
   } else if (variant == "default") {
-    // Default variant is: smp = true, has fpu, is gte2, is not r6. This is the traditional
-    // setting.
+    // Default variant is: smp = true, has FPU, is gte2. This is the traditional setting.
+    //
+    // Note, we get FPU bitness and R6-ness from the build (using cpp defines, see above)
+    // and don't override them because many things depend on the "default" variant being
+    // sufficient for most purposes. That is, "default" should work for both R2 and R6.
+    // Use "mips32r#" to get a specific configuration, possibly not matching the runtime
+    // ISA (e.g. for ISA-specific testing of dex2oat internals).
     mips_isa_gte2 = true;
   } else {
     LOG(WARNING) << "Unexpected CPU variant for Mips32 using defaults: " << variant;
diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h
index aac436e..120dc1c 100644
--- a/runtime/arch/mips/instruction_set_features_mips.h
+++ b/runtime/arch/mips/instruction_set_features_mips.h
@@ -81,8 +81,19 @@
 
  private:
   MipsInstructionSetFeatures(bool smp, bool fpu_32bit, bool mips_isa_gte2, bool r6)
-      : InstructionSetFeatures(smp), fpu_32bit_(fpu_32bit),  mips_isa_gte2_(mips_isa_gte2), r6_(r6)
-  {}
+      : InstructionSetFeatures(smp),
+        fpu_32bit_(fpu_32bit),
+        mips_isa_gte2_(mips_isa_gte2),
+        r6_(r6) {
+    // Sanity checks.
+    if (r6) {
+      CHECK(mips_isa_gte2);
+      CHECK(!fpu_32bit);
+    }
+    if (!mips_isa_gte2) {
+      CHECK(fpu_32bit);
+    }
+  }
 
   // Bitmap positions for encoding features as a bitmap.
   enum {
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 3ee26af..c1b8044 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -506,6 +506,18 @@
     move $a0, rSELF                 # pass Thread::Current
 END art_quick_throw_null_pointer_exception
 
+
+    /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+    .extern artThrowNullPointerExceptionFromSignal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    la   $t9, artThrowNullPointerExceptionFromSignal
+    jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uintptr_t, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
+END art_quick_throw_null_pointer_exception_from_signal
+
     /*
      * Called by managed code to create and deliver an ArithmeticException
      */
@@ -529,6 +541,18 @@
 END art_quick_throw_array_bounds
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt().
+     */
+    .extern artThrowStringBoundsFromCode
+ENTRY art_quick_throw_string_bounds
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    la   $t9, artThrowStringBoundsFromCode
+    jalr $zero, $t9                 # artThrowStringBoundsFromCode(index, limit, Thread*)
+    move $a2, rSELF                 # pass Thread::Current
+END art_quick_throw_string_bounds
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
     .extern artThrowStackOverflowFromCode
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 4abfcf1..0bbb6e1 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -27,7 +27,7 @@
 #include "thread-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 
 //
 // Mips64 specific fault handler functions.
@@ -44,7 +44,7 @@
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]);   // SP register
+  *out_sp = static_cast<uintptr_t>(sc->sc_regs[mips64::SP]);
   VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
@@ -56,7 +56,7 @@
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
       reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips64));
   if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]);  // A0 register
+    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[mips64::A0]);
   } else {
     // The method is at the top of the stack.
     *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
@@ -71,8 +71,11 @@
   *out_return_pc = sc->sc_pc + 4;
 }
 
-bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context) {
+bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  if (!IsValidImplicitCheck(info)) {
+    return false;
+  }
+
   // The code that looks for the catch location needs to know the value of the
   // PC at the point of call.  For Null checks we insert a GC map that is immediately after
   // the load/store instruction that might cause the fault.
@@ -80,9 +83,12 @@
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
 
-  sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
-  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips64::RA] = sc->sc_pc + 4;      // RA needs to point to gc map location
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+  sc->sc_regs[mips64::T9] = sc->sc_pc;          // make sure T9 points to the function
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+  sc->sc_regs[mips64::A0] = reinterpret_cast<uintptr_t>(info->si_addr);
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
@@ -111,7 +117,7 @@
   VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
   VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uintptr_t sp = sc->sc_regs[29];  // SP register
+  uintptr_t sp = sc->sc_regs[mips64::SP];
   VLOG(signals) << "sp: " << std::hex << sp;
 
   uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);  // BVA addr
@@ -134,7 +140,7 @@
   // caused this fault.  This will be inserted into a callee save frame by
   // the function to which this handler returns (art_quick_throw_stack_overflow).
   sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
-  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  sc->sc_regs[mips64::T9] = sc->sc_pc;          // make sure T9 points to the function
 
   // The kernel will now return to the address in sc->arm_pc.
   return true;
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 8f1a35a..ae69620 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -593,6 +593,17 @@
 END art_quick_throw_null_pointer_exception
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException
+     */
+    .extern artThrowNullPointerExceptionFromSignal
+ENTRY art_quick_throw_null_pointer_exception_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    dla  $t9, artThrowNullPointerExceptionFromSignal
+    jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uinptr_t, Thread*)
+    move $a1, rSELF                 # pass Thread::Current
+END art_quick_throw_null_pointer_exception
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException
      */
     .extern artThrowDivZeroFromCode
@@ -617,6 +628,19 @@
 END art_quick_throw_array_bounds
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt().
+     */
+    .extern artThrowStringBoundsFromCode
+ENTRY art_quick_throw_string_bounds
+.Lart_quick_throw_string_bounds_gp_set:
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+    dla  $t9, artThrowStringBoundsFromCode
+    jalr $zero, $t9                 # artThrowStringBoundsFromCode(index, limit, Thread*)
+    move $a2, rSELF                 # pass Thread::Current
+END art_quick_throw_string_bounds
+
+    /*
      * Called by managed code to create and deliver a StackOverflowError.
      */
     .extern artThrowStackOverflowFromCode
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index d7c4cb1..24e3a0d 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -36,6 +36,7 @@
 #define CTX_EIP uc_mcontext->__ss.__rip
 #define CTX_EAX uc_mcontext->__ss.__rax
 #define CTX_METHOD uc_mcontext->__ss.__rdi
+#define CTX_RDI uc_mcontext->__ss.__rdi
 #define CTX_JMP_BUF uc_mcontext->__ss.__rdi
 #else
 // 32 bit mac build.
@@ -71,12 +72,12 @@
 
 #if defined(__APPLE__) && defined(__x86_64__)
 // mac symbols have a prefix of _ on x86_64
-extern "C" void _art_quick_throw_null_pointer_exception();
+extern "C" void _art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void _art_quick_throw_stack_overflow();
 extern "C" void _art_quick_test_suspend();
 #define EXT_SYM(sym) _ ## sym
 #else
-extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal();
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_test_suspend();
 #define EXT_SYM(sym) sym
@@ -292,7 +293,10 @@
   *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
 }
 
-bool NullPointerHandler::Action(int, siginfo_t*, void* context) {
+bool NullPointerHandler::Action(int, siginfo_t* sig, void* context) {
+  if (!IsValidImplicitCheck(sig)) {
+    return false;
+  }
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
   uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
@@ -314,7 +318,15 @@
   *next_sp = retaddr;
   uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_throw_null_pointer_exception));
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(
+      EXT_SYM(art_quick_throw_null_pointer_exception_from_signal));
+  // Pass the faulting address as the first argument of
+  // art_quick_throw_null_pointer_exception_from_signal.
+#if defined(__x86_64__)
+  uc->CTX_RDI = reinterpret_cast<uintptr_t>(sig->si_addr);
+#else
+  uc->CTX_EAX = reinterpret_cast<uintptr_t>(sig->si_addr);
+#endif
   VLOG(signals) << "Generating null pointer exception";
   return true;
 }
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 0b6ddc1..6234f0f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -228,7 +228,7 @@
 MACRO0(DELIVER_PENDING_EXCEPTION)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save callee saves for throw
     // Outgoing argument set up
-    subl MACRO_LITERAL(12), %esp              // Alignment padding
+    subl MACRO_LITERAL(12), %esp               // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -254,7 +254,7 @@
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx  // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
-    subl MACRO_LITERAL(8), %esp               // alignment padding
+    subl MACRO_LITERAL(8), %esp                // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET               // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
@@ -284,6 +284,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -311,6 +316,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
      * the method_idx.  This wrapper will save arg1-arg3 and call the appropriate C helper.
@@ -1898,10 +1909,12 @@
 END_FUNCTION art_nested_signal_return
 
 DEFINE_FUNCTION art_quick_read_barrier_mark
+    subl LITERAL(8), %esp            // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH eax                         // pass arg1 - obj
     call SYMBOL(artReadBarrierMark)  // artReadBarrierMark(obj)
-    addl LITERAL(4), %esp            // pop argument
-    CFI_ADJUST_CFA_OFFSET(-4)
+    addl LITERAL(12), %esp           // pop argument and remove padding
+    CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_read_barrier_mark
 
@@ -1916,10 +1929,12 @@
 END_FUNCTION art_quick_read_barrier_slow
 
 DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
+    subl LITERAL(8), %esp                   // alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH eax                                // pass arg1 - root
     call SYMBOL(artReadBarrierForRootSlow)  // artReadBarrierForRootSlow(root)
-    addl LITERAL(4), %esp                   // pop argument
-    CFI_ADJUST_CFA_OFFSET(-4)
+    addl LITERAL(12), %esp                  // pop argument and remove padding
+    CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_read_barrier_for_root_slow
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 8064ed6..e777e6c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -309,6 +309,11 @@
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
 
     /*
+     * Call installed by a signal handler to create and deliver a NullPointerException.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
+
+    /*
      * Called by managed code to create and deliver an ArithmeticException.
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
@@ -336,6 +341,12 @@
 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
 
     /*
+     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
+     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
+
+    /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
      * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 32ae6ff..26450c4 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -120,10 +120,6 @@
   return dex_method_index_;
 }
 
-inline uint32_t ArtMethod::GetImtIndex() {
-  return GetDexMethodIndex() % ImTable::kSize;
-}
-
 inline ArtMethod** ArtMethod::GetDexCacheResolvedMethods(size_t pointer_size) {
   return GetNativePointer<ArtMethod**>(DexCacheResolvedMethodsOffset(pointer_size),
                                        pointer_size);
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 849af97..2b025f8 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -99,22 +99,6 @@
     return GetMethod(index * kMethodCount + kMethodImplementation, pointer_size);
   }
 
-  // Return true if two conflict tables are the same.
-  bool Equals(ImtConflictTable* other, size_t pointer_size) const {
-    size_t num = NumEntries(pointer_size);
-    if (num != other->NumEntries(pointer_size)) {
-      return false;
-    }
-    for (size_t i = 0; i < num; ++i) {
-      if (GetInterfaceMethod(i, pointer_size) != other->GetInterfaceMethod(i, pointer_size) ||
-          GetImplementationMethod(i, pointer_size) !=
-              other->GetImplementationMethod(i, pointer_size)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
   // Visit all of the entries.
   // NO_THREAD_SAFETY_ANALYSIS for calling with held locks. Visitor is passed a pair of ArtMethod*
   // and also returns one. The order is <interface, implementation>.
@@ -419,8 +403,6 @@
 
   ALWAYS_INLINE uint32_t GetDexMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE uint32_t GetImtIndex() SHARED_REQUIRES(Locks::mutator_lock_);
-
   void SetDexMethodIndex(uint32_t new_idx) {
     // Not called within a transaction.
     dex_method_index_ = new_idx;
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 480644a..2d702f6 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -127,32 +127,32 @@
 ADD_TEST_EQ(THREAD_SELF_OFFSET,
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
-// Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 166 * __SIZEOF_POINTER__)
-ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
-            art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + 2 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 168 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
 #define THREAD_LOCAL_END_OFFSET (THREAD_LOCAL_POS_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
             art::Thread::ThreadLocalEndOffset<__SIZEOF_POINTER__>().Int32Value())
+// Offset of field Thread::tlsPtr_.thread_local_objects.
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
+            art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
-#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 2 * __SIZEOF_POINTER__)
+#define THREAD_CURRENT_IBASE_OFFSET (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
             art::Thread::MterpCurrentIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_default_ibase.
-#define THREAD_DEFAULT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 3 * __SIZEOF_POINTER__)
+#define THREAD_DEFAULT_IBASE_OFFSET (THREAD_CURRENT_IBASE_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_DEFAULT_IBASE_OFFSET,
             art::Thread::MterpDefaultIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_alt_ibase.
-#define THREAD_ALT_IBASE_OFFSET (THREAD_LOCAL_POS_OFFSET + 4 * __SIZEOF_POINTER__)
+#define THREAD_ALT_IBASE_OFFSET (THREAD_DEFAULT_IBASE_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_ALT_IBASE_OFFSET,
             art::Thread::MterpAltIBaseOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.rosalloc_runs.
-#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_LOCAL_POS_OFFSET + 5 * __SIZEOF_POINTER__)
+#define THREAD_ROSALLOC_RUNS_OFFSET (THREAD_ALT_IBASE_OFFSET + __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_ROSALLOC_RUNS_OFFSET,
             art::Thread::RosAllocRunsOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_alloc_stack_top.
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index bd8de87..1c32024 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -73,6 +73,11 @@
           level == kThreadListLock ||
           // Ignore logging which may or may not have set up thread data structures.
           level == kLoggingLock ||
+          // When transitioning from suspended to runnable, a daemon thread might be in
+          // a situation where the runtime is shutting down. To not crash our debug locking
+          // mechanism we just pass null Thread* to the MutexLock during that transition
+          // (see Thread::TransitionFromSuspendedToRunnable).
+          level == kThreadSuspendCountLock ||
           // Avoid recursive death.
           level == kAbortLock) << level;
   }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 7c00315..3ec8f21 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -857,13 +857,11 @@
     if (vtable != nullptr) {
       SanityCheckArtMethodPointerArray(vtable, nullptr, pointer_size, image_spaces);
     }
-    if (klass->ShouldHaveImt()) {
-      ImTable* imt = klass->GetImt(pointer_size);
-      for (size_t i = 0; i < ImTable::kSize; ++i) {
-        SanityCheckArtMethod(imt->Get(i, pointer_size), nullptr, image_spaces);
+    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
+      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+        SanityCheckArtMethod(
+            klass->GetEmbeddedImTableEntry(i, pointer_size), nullptr, image_spaces);
       }
-    }
-    if (klass->ShouldHaveEmbeddedVTable()) {
       for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
         SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_spaces);
       }
@@ -2538,6 +2536,10 @@
   CHECK(h_new_class.Get() != nullptr) << descriptor;
   CHECK(h_new_class->IsResolved()) << descriptor;
 
+  // Update the dex cache of where the class is defined. Inlining depends on having
+  // this filled.
+  h_new_class->GetDexCache()->SetResolvedType(h_new_class->GetDexTypeIndex(), h_new_class.Get());
+
   // Instrumentation may have updated entrypoints for all methods of all
   // classes. However it could not update methods of this class while we
   // were loading it. Now the class is resolved, we can update entrypoints
@@ -3458,11 +3460,16 @@
     new_class->SetClassFlags(mirror::kClassFlagObjectArray);
   }
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusLoaded, self);
-  new_class->PopulateEmbeddedVTable(image_pointer_size_);
+  {
+    ArtMethod* imt[mirror::Class::kImtSize];
+    std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
+    new_class->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
+  }
   mirror::Class::SetStatus(new_class, mirror::Class::kStatusInitialized, self);
   // don't need to set new_class->SetObjectSize(..)
   // because Object::SizeOf delegates to Array::SizeOf
 
+
   // All arrays have java/lang/Cloneable and java/io/Serializable as
   // interfaces.  We need to set that up here, so that stuff like
   // "instanceof" works right.
@@ -5033,11 +5040,9 @@
   if (!LinkSuperClass(klass)) {
     return false;
   }
-  ArtMethod* imt_data[ImTable::kSize];
-  // If there are any new conflicts compared to super class.
-  bool new_conflict = false;
-  std::fill_n(imt_data, arraysize(imt_data), Runtime::Current()->GetImtUnimplementedMethod());
-  if (!LinkMethods(self, klass, interfaces, &new_conflict, imt_data)) {
+  ArtMethod* imt[mirror::Class::kImtSize];
+  std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
+  if (!LinkMethods(self, klass, interfaces, imt)) {
     return false;
   }
   if (!LinkInstanceFields(self, klass)) {
@@ -5050,45 +5055,15 @@
   CreateReferenceInstanceOffsets(klass);
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
 
-  ImTable* imt = nullptr;
-  if (klass->ShouldHaveImt()) {
-    // If there are any new conflicts compared to the super class we can not make a copy. There
-    // can be cases where both will have a conflict method at the same slot without having the same
-    // set of conflicts. In this case, we can not share the IMT since the conflict table slow path
-    // will possibly create a table that is incorrect for either of the classes.
-    // Same IMT with new_conflict does not happen very often.
-    if (!new_conflict && klass->HasSuperClass() && klass->GetSuperClass()->ShouldHaveImt()) {
-      ImTable* super_imt = klass->GetSuperClass()->GetImt(image_pointer_size_);
-      bool imt_equals = true;
-      for (size_t i = 0; i < ImTable::kSize && imt_equals; ++i) {
-        imt_equals = imt_equals && (super_imt->Get(i, image_pointer_size_) == imt_data[i]);
-      }
-      if (imt_equals) {
-        imt = super_imt;
-      }
-    }
-    if (imt == nullptr) {
-      LinearAlloc* allocator = GetAllocatorForClassLoader(klass->GetClassLoader());
-      imt = reinterpret_cast<ImTable*>(
-          allocator->Alloc(self, ImTable::SizeInBytes(image_pointer_size_)));
-      if (imt == nullptr) {
-        return false;
-      }
-      imt->Populate(imt_data, image_pointer_size_);
-    }
-  }
-
   if (!klass->IsTemp() || (!init_done_ && klass->GetClassSize() == class_size)) {
     // We don't need to retire this class as it has no embedded tables or it was created the
     // correct size during class linker initialization.
     CHECK_EQ(klass->GetClassSize(), class_size) << PrettyDescriptor(klass.Get());
 
-    if (klass->ShouldHaveEmbeddedVTable()) {
-      klass->PopulateEmbeddedVTable(image_pointer_size_);
+    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
+      klass->PopulateEmbeddedImtAndVTable(imt, image_pointer_size_);
     }
-    if (klass->ShouldHaveImt()) {
-      klass->SetImt(imt, image_pointer_size_);
-    }
+
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
@@ -5480,7 +5455,6 @@
 bool ClassLinker::LinkMethods(Thread* self,
                               Handle<mirror::Class> klass,
                               Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                              bool* out_new_conflict,
                               ArtMethod** out_imt) {
   self->AllowThreadSuspension();
   // A map from vtable indexes to the method they need to be updated to point to. Used because we
@@ -5492,7 +5466,7 @@
   // any vtable entries with new default method implementations.
   return SetupInterfaceLookupTable(self, klass, interfaces)
           && LinkVirtualMethods(self, klass, /*out*/ &default_translations)
-          && LinkInterfaceMethods(self, klass, default_translations, out_new_conflict, out_imt);
+          && LinkInterfaceMethods(self, klass, default_translations, out_imt);
 }
 
 // Comparator for name and signature of a method, used in finding overriding methods. Implementation
@@ -5650,7 +5624,7 @@
     StackHandleScope<2> hs(self);
     Handle<mirror::Class> super_class(hs.NewHandle(klass->GetSuperClass()));
     MutableHandle<mirror::PointerArray> vtable;
-    if (super_class->ShouldHaveEmbeddedVTable()) {
+    if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
       vtable = hs.NewHandle(AllocPointerArray(self, max_count));
       if (UNLIKELY(vtable.Get() == nullptr)) {
         self->AssertPendingOOMException();
@@ -6050,7 +6024,6 @@
 void ClassLinker::SetIMTRef(ArtMethod* unimplemented_method,
                             ArtMethod* imt_conflict_method,
                             ArtMethod* current_method,
-                            /*out*/bool* new_conflict,
                             /*out*/ArtMethod** imt_ref) {
   // Place method in imt if entry is empty, place conflict otherwise.
   if (*imt_ref == unimplemented_method) {
@@ -6067,77 +6040,40 @@
       *imt_ref = current_method;
     } else {
       *imt_ref = imt_conflict_method;
-      *new_conflict = true;
     }
   } else {
     // Place the default conflict method. Note that there may be an existing conflict
     // method in the IMT, but it could be one tailored to the super class, with a
     // specific ImtConflictTable.
     *imt_ref = imt_conflict_method;
-    *new_conflict = true;
   }
 }
 
 void ClassLinker::FillIMTAndConflictTables(mirror::Class* klass) {
-  DCHECK(klass->ShouldHaveImt()) << PrettyClass(klass);
+  DCHECK(klass->ShouldHaveEmbeddedImtAndVTable()) << PrettyClass(klass);
   DCHECK(!klass->IsTemp()) << PrettyClass(klass);
-  ArtMethod* imt_data[ImTable::kSize];
+  ArtMethod* imt[mirror::Class::kImtSize];
   Runtime* const runtime = Runtime::Current();
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
   ArtMethod* const conflict_method = runtime->GetImtConflictMethod();
-  std::fill_n(imt_data, arraysize(imt_data), unimplemented_method);
+  std::fill_n(imt, arraysize(imt), unimplemented_method);
   if (klass->GetIfTable() != nullptr) {
-    bool new_conflict = false;
     FillIMTFromIfTable(klass->GetIfTable(),
                        unimplemented_method,
                        conflict_method,
                        klass,
-                       /*create_conflict_tables*/true,
-                       /*ignore_copied_methods*/false,
-                       &new_conflict,
-                       &imt_data[0]);
+                       true,
+                       false,
+                       &imt[0]);
   }
-  if (!klass->ShouldHaveImt()) {
-    return;
+  for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+    klass->SetEmbeddedImTableEntry(i, imt[i], image_pointer_size_);
   }
-  // Compare the IMT with the super class including the conflict methods. If they are equivalent,
-  // we can just use the same pointer.
-  ImTable* imt = nullptr;
-  mirror::Class* super_class = klass->GetSuperClass();
-  if (super_class != nullptr && super_class->ShouldHaveImt()) {
-    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
-    bool same = true;
-    for (size_t i = 0; same && i < ImTable::kSize; ++i) {
-      ArtMethod* method = imt_data[i];
-      ArtMethod* super_method = super_imt->Get(i, image_pointer_size_);
-      if (method != super_method) {
-        bool is_conflict_table = method->IsRuntimeMethod() &&
-                                 method != unimplemented_method &&
-                                 method != conflict_method;
-        // Verify conflict contents.
-        bool super_conflict_table = super_method->IsRuntimeMethod() &&
-                                    super_method != unimplemented_method &&
-                                    super_method != conflict_method;
-        if (!is_conflict_table || !super_conflict_table) {
-          same = false;
-        } else {
-          ImtConflictTable* table1 = method->GetImtConflictTable(image_pointer_size_);
-          ImtConflictTable* table2 = super_method->GetImtConflictTable(image_pointer_size_);
-          same = same && table1->Equals(table2, image_pointer_size_);
-        }
-      }
-    }
-    if (same) {
-      imt = super_imt;
-    }
-  }
-  if (imt == nullptr) {
-    imt = klass->GetImt(image_pointer_size_);
-    DCHECK(imt != nullptr);
-    imt->Populate(imt_data, image_pointer_size_);
-  } else {
-    klass->SetImt(imt, image_pointer_size_);
-  }
+}
+
+static inline uint32_t GetIMTIndex(ArtMethod* interface_method)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return interface_method->GetDexMethodIndex() % mirror::Class::kImtSize;
 }
 
 ImtConflictTable* ClassLinker::CreateImtConflictTable(size_t count,
@@ -6159,9 +6095,8 @@
                                      mirror::Class* klass,
                                      bool create_conflict_tables,
                                      bool ignore_copied_methods,
-                                     /*out*/bool* new_conflict,
-                                     /*out*/ArtMethod** imt) {
-  uint32_t conflict_counts[ImTable::kSize] = {};
+                                     ArtMethod** imt) {
+  uint32_t conflict_counts[mirror::Class::kImtSize] = {};
   for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
     mirror::Class* interface = if_table->GetInterface(i);
     const size_t num_virtuals = interface->NumVirtualMethods();
@@ -6191,7 +6126,7 @@
       // or interface methods in the IMT here they will not create extra conflicts since we compare
       // names and signatures in SetIMTRef.
       ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-      const uint32_t imt_index = interface_method->GetImtIndex();
+      const uint32_t imt_index = GetIMTIndex(interface_method);
 
       // There is only any conflicts if all of the interface methods for an IMT slot don't have
       // the same implementation method, keep track of this to avoid creating a conflict table in
@@ -6203,7 +6138,6 @@
       SetIMTRef(unimplemented_method,
                 imt_conflict_method,
                 implementation_method,
-                /*out*/new_conflict,
                 /*out*/&imt[imt_index]);
     }
   }
@@ -6211,7 +6145,7 @@
   if (create_conflict_tables) {
     // Create the conflict tables.
     LinearAlloc* linear_alloc = GetAllocatorForClassLoader(klass->GetClassLoader());
-    for (size_t i = 0; i < ImTable::kSize; ++i) {
+    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
       size_t conflicts = conflict_counts[i];
       if (imt[i] == imt_conflict_method) {
         ImtConflictTable* new_table = CreateImtConflictTable(conflicts, linear_alloc);
@@ -6245,7 +6179,7 @@
         }
         DCHECK(implementation_method != nullptr);
         ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-        const uint32_t imt_index = interface_method->GetImtIndex();
+        const uint32_t imt_index = GetIMTIndex(interface_method);
         if (!imt[imt_index]->IsRuntimeMethod() ||
             imt[imt_index] == unimplemented_method ||
             imt[imt_index] == imt_conflict_method) {
@@ -6498,14 +6432,12 @@
 void ClassLinker::FillImtFromSuperClass(Handle<mirror::Class> klass,
                                         ArtMethod* unimplemented_method,
                                         ArtMethod* imt_conflict_method,
-                                        bool* new_conflict,
                                         ArtMethod** imt) {
   DCHECK(klass->HasSuperClass());
   mirror::Class* super_class = klass->GetSuperClass();
-  if (super_class->ShouldHaveImt()) {
-    ImTable* super_imt = super_class->GetImt(image_pointer_size_);
-    for (size_t i = 0; i < ImTable::kSize; ++i) {
-      imt[i] = super_imt->Get(i, image_pointer_size_);
+  if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
+    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+      imt[i] = super_class->GetEmbeddedImTableEntry(i, image_pointer_size_);
     }
   } else {
     // No imt in the super class, need to reconstruct from the iftable.
@@ -6518,7 +6450,6 @@
                          klass.Get(),
                          /*create_conflict_table*/false,
                          /*ignore_copied_methods*/true,
-                         /*out*/new_conflict,
                          /*out*/imt);
     }
   }
@@ -6529,7 +6460,6 @@
     Thread* self,
     Handle<mirror::Class> klass,
     const std::unordered_map<size_t, ClassLinker::MethodTranslation>& default_translations,
-    bool* out_new_conflict,
     ArtMethod** out_imt) {
   StackHandleScope<3> hs(self);
   Runtime* const runtime = Runtime::Current();
@@ -6565,7 +6495,6 @@
     FillImtFromSuperClass(klass,
                           unimplemented_method,
                           imt_conflict_method,
-                          out_new_conflict,
                           out_imt);
   }
   // Allocate method arrays before since we don't want miss visiting miranda method roots due to
@@ -6651,7 +6580,7 @@
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
         MethodNameAndSignatureComparator interface_name_comparator(
             interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-        uint32_t imt_index = interface_method->GetImtIndex();
+        uint32_t imt_index = GetIMTIndex(interface_method);
         ArtMethod** imt_ptr = &out_imt[imt_index];
         // For each method listed in the interface's method list, find the
         // matching method in our class's method list.  We want to favor the
@@ -6697,7 +6626,6 @@
                 SetIMTRef(unimplemented_method,
                           imt_conflict_method,
                           vtable_method,
-                          /*out*/out_new_conflict,
                           /*out*/imt_ptr);
               }
               break;
@@ -6840,7 +6768,6 @@
             SetIMTRef(unimplemented_method,
                       imt_conflict_method,
                       current_method,
-                      /*out*/out_new_conflict,
                       /*out*/imt_ptr);
           }
         }
@@ -7040,7 +6967,7 @@
       }
 
       // Fix up IMT next
-      for (size_t i = 0; i < ImTable::kSize; ++i) {
+      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
         auto it = move_table.find(out_imt[i]);
         if (it != move_table.end()) {
           out_imt[i] = it->second;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index d6822c5..ca5af19 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -833,7 +833,6 @@
   bool LinkMethods(Thread* self,
                    Handle<mirror::Class> klass,
                    Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                   bool* out_new_conflict,
                    ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -969,20 +968,19 @@
   // * kDefaultConflict - Conflicting method implementations were found when searching for
   //                      target_method. The value of *out_default_method is null.
   DefaultMethodSearchResult FindDefaultMethodImplementation(
-      Thread* self,
-      ArtMethod* target_method,
-      Handle<mirror::Class> klass,
-      /*out*/ArtMethod** out_default_method) const
+          Thread* self,
+          ArtMethod* target_method,
+          Handle<mirror::Class> klass,
+          /*out*/ArtMethod** out_default_method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sets the imt entries and fixes up the vtable for the given class by linking all the interface
   // methods. See LinkVirtualMethods for an explanation of what default_translations is.
   bool LinkInterfaceMethods(
-      Thread* self,
-      Handle<mirror::Class> klass,
-      const std::unordered_map<size_t, MethodTranslation>& default_translations,
-      bool* out_new_conflict,
-      ArtMethod** out_imt)
+          Thread* self,
+          Handle<mirror::Class> klass,
+          const std::unordered_map<size_t, MethodTranslation>& default_translations,
+          ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool LinkStaticFields(Thread* self, Handle<mirror::Class> klass, size_t* class_size)
@@ -1098,7 +1096,6 @@
   void SetIMTRef(ArtMethod* unimplemented_method,
                  ArtMethod* imt_conflict_method,
                  ArtMethod* current_method,
-                 /*out*/bool* new_conflict,
                  /*out*/ArtMethod** imt_ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FillIMTFromIfTable(mirror::IfTable* if_table,
@@ -1107,13 +1104,11 @@
                           mirror::Class* klass,
                           bool create_conflict_tables,
                           bool ignore_copied_methods,
-                          /*out*/bool* new_conflict,
-                          /*out*/ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
+                          ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FillImtFromSuperClass(Handle<mirror::Class> klass,
                              ArtMethod* unimplemented_method,
                              ArtMethod* imt_conflict_method,
-                             bool* new_conflict,
                              ArtMethod** imt) SHARED_REQUIRES(Locks::mutator_lock_);
 
   std::vector<const DexFile*> boot_class_path_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 9b59f2b..488826b 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -148,8 +148,7 @@
     EXPECT_EQ(0U, array->NumInstanceFields());
     EXPECT_EQ(0U, array->NumStaticFields());
     EXPECT_EQ(2U, array->NumDirectInterfaces());
-    EXPECT_FALSE(array->ShouldHaveImt());
-    EXPECT_TRUE(array->ShouldHaveEmbeddedVTable());
+    EXPECT_TRUE(array->ShouldHaveEmbeddedImtAndVTable());
     EXPECT_EQ(2, array->GetIfTableCount());
     ASSERT_TRUE(array->GetIfTable() != nullptr);
     mirror::Class* direct_interface0 = mirror::Class::GetDirectInterface(self, array, 0);
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 5bdb36c..741b682 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -284,7 +284,8 @@
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   std::string error_msg;
   MemMap::Init();
-  if (!DexFile::Open(location, location, &error_msg, &dex_files)) {
+  static constexpr bool kVerifyChecksum = true;
+  if (!DexFile::Open(location, location, kVerifyChecksum, &error_msg, &dex_files)) {
     LOG(FATAL) << "Could not open .dex file '" << location << "': " << error_msg << "\n";
     UNREACHABLE();
   } else {
@@ -462,7 +463,7 @@
 #define ART_TARGET_NATIVETEST_DIR_STRING ""
 #endif
 
-std::string CommonRuntimeTestImpl::GetTestDexFileName(const char* name) {
+std::string CommonRuntimeTestImpl::GetTestDexFileName(const char* name) const {
   CHECK(name != nullptr);
   std::string filename;
   if (IsHost()) {
@@ -480,9 +481,11 @@
 std::vector<std::unique_ptr<const DexFile>> CommonRuntimeTestImpl::OpenTestDexFiles(
     const char* name) {
   std::string filename = GetTestDexFileName(name);
+  static constexpr bool kVerifyChecksum = true;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> dex_files;
-  bool success = DexFile::Open(filename.c_str(), filename.c_str(), &error_msg, &dex_files);
+  bool success = DexFile::Open(
+      filename.c_str(), filename.c_str(), kVerifyChecksum, &error_msg, &dex_files);
   CHECK(success) << "Failed to open '" << filename << "': " << error_msg;
   for (auto& dex_file : dex_files) {
     CHECK_EQ(PROT_READ, dex_file->GetPermissions());
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 0ce40e8..b68eb19 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -111,7 +111,7 @@
 
   std::string GetTestAndroidRoot();
 
-  std::string GetTestDexFileName(const char* name);
+  std::string GetTestDexFileName(const char* name) const;
 
   std::vector<std::unique_ptr<const DexFile>> OpenTestDexFiles(const char* name)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 75cce42..912a74a 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -402,12 +402,117 @@
                                                dex_file, type);
 }
 
-void ThrowNullPointerExceptionFromDexPC() {
+static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instruction& instr)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (!CanDoImplicitNullCheckOn(addr)) {
+    return false;
+  }
+
+  switch (instr.Opcode()) {
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      // Without inlining, we could just check that the offset is the class offset.
+      // However, when inlining, the compiler can (validly) merge the null check with a field access
+      // on the same object. Note that the stack map at the NPE will reflect the invoke's location,
+      // which is the caller.
+      return true;
+    }
+
+    case Instruction::IGET:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_SHORT:
+    case Instruction::IPUT:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_SHORT: {
+      ArtField* field =
+          Runtime::Current()->GetClassLinker()->ResolveField(instr.VRegC_22c(), method, false);
+      return (addr == 0) ||
+          (addr == field->GetOffset().Uint32Value()) ||
+          (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value()));
+    }
+
+    case Instruction::IGET_QUICK:
+    case Instruction::IGET_BOOLEAN_QUICK:
+    case Instruction::IGET_BYTE_QUICK:
+    case Instruction::IGET_CHAR_QUICK:
+    case Instruction::IGET_SHORT_QUICK:
+    case Instruction::IGET_WIDE_QUICK:
+    case Instruction::IGET_OBJECT_QUICK:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_BOOLEAN_QUICK:
+    case Instruction::IPUT_BYTE_QUICK:
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT_QUICK:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::IPUT_OBJECT_QUICK: {
+      return (addr == 0u) ||
+          (addr == instr.VRegC_22c()) ||
+          (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value()));
+    }
+
+    case Instruction::AGET:
+    case Instruction::AGET_WIDE:
+    case Instruction::AGET_OBJECT:
+    case Instruction::AGET_BOOLEAN:
+    case Instruction::AGET_BYTE:
+    case Instruction::AGET_CHAR:
+    case Instruction::AGET_SHORT:
+    case Instruction::APUT:
+    case Instruction::APUT_WIDE:
+    case Instruction::APUT_OBJECT:
+    case Instruction::APUT_BOOLEAN:
+    case Instruction::APUT_BYTE:
+    case Instruction::APUT_CHAR:
+    case Instruction::APUT_SHORT:
+    case Instruction::FILL_ARRAY_DATA:
+    case Instruction::ARRAY_LENGTH: {
+      // The length access should crash. We currently do not do implicit checks on
+      // the array access itself.
+      return (addr == 0u) ||
+          (addr == mirror::Array::LengthOffset().Uint32Value()) ||
+          (kEmitCompilerReadBarrier && (addr == mirror::Object::MonitorOffset().Uint32Value()));
+    }
+
+    default: {
+      // We have covered all the cases where an NPE could occur.
+      // Note that this must be kept in sync with the compiler, and adding
+      // any new way to do implicit checks in the compiler should also update
+      // this code.
+      return false;
+    }
+  }
+}
+
+void ThrowNullPointerExceptionFromDexPC(bool check_address, uintptr_t addr) {
   uint32_t throw_dex_pc;
   ArtMethod* method = Thread::Current()->GetCurrentMethod(&throw_dex_pc);
   const DexFile::CodeItem* code = method->GetCodeItem();
   CHECK_LT(throw_dex_pc, code->insns_size_in_code_units_);
   const Instruction* instr = Instruction::At(&code->insns_[throw_dex_pc]);
+  if (check_address && !IsValidImplicitCheck(addr, method, *instr)) {
+    const DexFile* dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
+    LOG(FATAL) << "Invalid address for an implicit NullPointerException check: "
+               << "0x" << std::hex << addr << std::dec
+               << ", at "
+               << instr->DumpString(dex_file)
+               << " in "
+               << PrettyMethod(method);
+  }
+
   switch (instr->Opcode()) {
     case Instruction::INVOKE_DIRECT:
       ThrowNullPointerExceptionForMethodAccess(instr->VRegB_35c(), kDirect);
@@ -530,14 +635,32 @@
       ThrowException("Ljava/lang/NullPointerException;", nullptr,
                      "Attempt to get length of null array");
       break;
+    case Instruction::FILL_ARRAY_DATA: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Attempt to write to null array");
+      break;
+    }
+    case Instruction::INVOKE_LAMBDA:
+    case Instruction::BOX_LAMBDA:
+    case Instruction::UNBOX_LAMBDA:
+    case Instruction::LIBERATE_VARIABLE: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Using a null lambda");
+      break;
+    }
+    case Instruction::MONITOR_ENTER:
+    case Instruction::MONITOR_EXIT: {
+      ThrowException("Ljava/lang/NullPointerException;", nullptr,
+                     "Attempt to do a synchronize operation on a null object");
+      break;
+    }
     default: {
-      // TODO: We should have covered all the cases where we expect a NPE above, this
-      //       message/logging is so we can improve any cases we've missed in the future.
       const DexFile* dex_file =
           method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-      ThrowException("Ljava/lang/NullPointerException;", nullptr,
-                     StringPrintf("Null pointer exception during instruction '%s'",
-                                  instr->DumpString(dex_file).c_str()).c_str());
+      LOG(FATAL) << "NullPointerException at an unexpected instruction: "
+                 << instr->DumpString(dex_file)
+                 << " in "
+                 << PrettyMethod(method);
       break;
     }
   }
@@ -654,6 +777,13 @@
   }
 }
 
+// StringIndexOutOfBoundsException
+
+void ThrowStringIndexOutOfBoundsException(int index, int length) {
+  ThrowException("Ljava/lang/StringIndexOutOfBoundsException;", nullptr,
+                 StringPrintf("length=%d; index=%d", length, index).c_str());
+}
+
 // VerifyError
 
 void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index c3a1f09..cbd338d 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -195,7 +195,7 @@
                                               InvokeType type)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowNullPointerExceptionFromDexPC()
+void ThrowNullPointerExceptionFromDexPC(bool check_address = false, uintptr_t addr = 0)
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowNullPointerException(const char* msg)
@@ -211,6 +211,11 @@
 
 void ThrowStackOverflowError(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+// StringIndexOutOfBoundsException
+
+void ThrowStringIndexOutOfBoundsException(int index, int length)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // VerifyError
 
 void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...)
diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h
new file mode 100644
index 0000000..743fbb9
--- /dev/null
+++ b/runtime/dex2oat_environment_test.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX2OAT_ENVIRONMENT_TEST_H_
+#define ART_RUNTIME_DEX2OAT_ENVIRONMENT_TEST_H_
+
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "common_runtime_test.h"
+#include "compiler_callbacks.h"
+#include "gc/heap.h"
+#include "gc/space/image_space.h"
+#include "oat_file_assistant.h"
+#include "os.h"
+#include "runtime.h"
+#include "utils.h"
+
+namespace art {
+
+// Test class that provides some helpers to set a test up for compilation using dex2oat.
+class Dex2oatEnvironmentTest : public CommonRuntimeTest {
+ public:
+  virtual void SetUp() OVERRIDE {
+    CommonRuntimeTest::SetUp();
+
+    // Create a scratch directory to work from.
+    scratch_dir_ = android_data_ + "/Dex2oatEnvironmentTest";
+    ASSERT_EQ(0, mkdir(scratch_dir_.c_str(), 0700));
+
+    // Create a subdirectory in scratch for odex files.
+    odex_oat_dir_ = scratch_dir_ + "/oat";
+    ASSERT_EQ(0, mkdir(odex_oat_dir_.c_str(), 0700));
+
+    odex_dir_ = odex_oat_dir_ + "/" + std::string(GetInstructionSetString(kRuntimeISA));
+    ASSERT_EQ(0, mkdir(odex_dir_.c_str(), 0700));
+
+    // Verify the environment is as we expect
+    uint32_t checksum;
+    std::string error_msg;
+    ASSERT_TRUE(OS::FileExists(GetSystemImageFile().c_str()))
+      << "Expected pre-compiled boot image to be at: " << GetSystemImageFile();
+    ASSERT_TRUE(OS::FileExists(GetDexSrc1().c_str()))
+      << "Expected dex file to be at: " << GetDexSrc1();
+    ASSERT_TRUE(OS::FileExists(GetStrippedDexSrc1().c_str()))
+      << "Expected stripped dex file to be at: " << GetStrippedDexSrc1();
+    ASSERT_FALSE(DexFile::GetChecksum(GetStrippedDexSrc1().c_str(), &checksum, &error_msg))
+      << "Expected stripped dex file to be stripped: " << GetStrippedDexSrc1();
+    ASSERT_TRUE(OS::FileExists(GetDexSrc2().c_str()))
+      << "Expected dex file to be at: " << GetDexSrc2();
+
+    // GetMultiDexSrc2 should have the same primary dex checksum as
+    // GetMultiDexSrc1, but a different secondary dex checksum.
+    static constexpr bool kVerifyChecksum = true;
+    std::vector<std::unique_ptr<const DexFile>> multi1;
+    ASSERT_TRUE(DexFile::Open(GetMultiDexSrc1().c_str(),
+          GetMultiDexSrc1().c_str(), kVerifyChecksum, &error_msg, &multi1)) << error_msg;
+    ASSERT_GT(multi1.size(), 1u);
+
+    std::vector<std::unique_ptr<const DexFile>> multi2;
+    ASSERT_TRUE(DexFile::Open(GetMultiDexSrc2().c_str(),
+          GetMultiDexSrc2().c_str(), kVerifyChecksum, &error_msg, &multi2)) << error_msg;
+    ASSERT_GT(multi2.size(), 1u);
+
+    ASSERT_EQ(multi1[0]->GetLocationChecksum(), multi2[0]->GetLocationChecksum());
+    ASSERT_NE(multi1[1]->GetLocationChecksum(), multi2[1]->GetLocationChecksum());
+  }
+
+  virtual void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
+    // options->push_back(std::make_pair("-verbose:oat", nullptr));
+
+    // Set up the image location.
+    options->push_back(std::make_pair("-Ximage:" + GetImageLocation(),
+          nullptr));
+    // Make sure compilercallbacks are not set so that relocation will be
+    // enabled.
+    callbacks_.reset();
+  }
+
+  virtual void TearDown() OVERRIDE {
+    ClearDirectory(odex_dir_.c_str());
+    ASSERT_EQ(0, rmdir(odex_dir_.c_str()));
+
+    ClearDirectory(odex_oat_dir_.c_str());
+    ASSERT_EQ(0, rmdir(odex_oat_dir_.c_str()));
+
+    ClearDirectory(scratch_dir_.c_str());
+    ASSERT_EQ(0, rmdir(scratch_dir_.c_str()));
+
+    CommonRuntimeTest::TearDown();
+  }
+
+  static void Copy(const std::string& src, const std::string& dst) {
+    std::ifstream  src_stream(src, std::ios::binary);
+    std::ofstream  dst_stream(dst, std::ios::binary);
+
+    dst_stream << src_stream.rdbuf();
+  }
+
+  // Returns the directory where the pre-compiled core.art can be found.
+  // TODO: We should factor out this into common tests somewhere rather than
+  // re-hardcoding it here (This was copied originally from the elf writer
+  // test).
+  std::string GetImageDirectory() const {
+    if (IsHost()) {
+      const char* host_dir = getenv("ANDROID_HOST_OUT");
+      CHECK(host_dir != nullptr);
+      return std::string(host_dir) + "/framework";
+    } else {
+      return std::string("/data/art-test");
+    }
+  }
+
+  std::string GetImageLocation() const {
+    return GetImageDirectory() + "/core.art";
+  }
+
+  std::string GetSystemImageFile() const {
+    return GetImageDirectory() + "/" + GetInstructionSetString(kRuntimeISA)
+      + "/core.art";
+  }
+
+  bool GetCachedImageFile(/*out*/std::string* image, std::string* error_msg) const {
+    std::string cache = GetDalvikCache(GetInstructionSetString(kRuntimeISA), true);
+    return GetDalvikCacheFilename(GetImageLocation().c_str(), cache.c_str(), image, error_msg);
+  }
+
+  std::string GetDexSrc1() const {
+    return GetTestDexFileName("Main");
+  }
+
+  // Returns the path to a dex file equivalent to GetDexSrc1, but with the dex
+  // file stripped.
+  std::string GetStrippedDexSrc1() const {
+    return GetTestDexFileName("MainStripped");
+  }
+
+  std::string GetMultiDexSrc1() const {
+    return GetTestDexFileName("MultiDex");
+  }
+
+  // Returns the path to a multidex file equivalent to GetMultiDexSrc2, but
+  // with the contents of the secondary dex file changed.
+  std::string GetMultiDexSrc2() const {
+    return GetTestDexFileName("MultiDexModifiedSecondary");
+  }
+
+  std::string GetDexSrc2() const {
+    return GetTestDexFileName("Nested");
+  }
+
+  // Scratch directory, for dex and odex files (oat files will go in the
+  // dalvik cache).
+  const std::string& GetScratchDir() const {
+    return scratch_dir_;
+  }
+
+  // Odex directory is the subdirectory in the scratch directory where odex
+  // files should be located.
+  const std::string& GetOdexDir() const {
+    return odex_dir_;
+  }
+
+ private:
+  std::string scratch_dir_;
+  std::string odex_oat_dir_;
+  std::string odex_dir_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEX2OAT_ENVIRONMENT_TEST_H_
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 4e6c3ca..108a5af 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -38,10 +38,88 @@
   return reinterpret_cast<const char*>(ptr);
 }
 
+inline const char* DexFile::GetStringData(const StringId& string_id) const {
+  uint32_t ignored;
+  return GetStringDataAndUtf16Length(string_id, &ignored);
+}
+
+inline const char* DexFile::StringDataAndUtf16LengthByIdx(uint32_t idx,
+                                                          uint32_t* utf16_length) const {
+  if (idx == kDexNoIndex) {
+    *utf16_length = 0;
+    return nullptr;
+  }
+  const StringId& string_id = GetStringId(idx);
+  return GetStringDataAndUtf16Length(string_id, utf16_length);
+}
+
+inline const char* DexFile::StringDataByIdx(uint32_t idx) const {
+  uint32_t unicode_length;
+  return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
+}
+
+inline const char* DexFile::StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const {
+  const TypeId& type_id = GetTypeId(idx);
+  return StringDataAndUtf16LengthByIdx(type_id.descriptor_idx_, unicode_length);
+}
+
+inline const char* DexFile::StringByTypeIdx(uint32_t idx) const {
+  const TypeId& type_id = GetTypeId(idx);
+  return StringDataByIdx(type_id.descriptor_idx_);
+}
+
+inline const char* DexFile::GetTypeDescriptor(const TypeId& type_id) const {
+  return StringDataByIdx(type_id.descriptor_idx_);
+}
+
+inline const char* DexFile::GetFieldTypeDescriptor(const FieldId& field_id) const {
+  const DexFile::TypeId& type_id = GetTypeId(field_id.type_idx_);
+  return GetTypeDescriptor(type_id);
+}
+
+inline const char* DexFile::GetFieldName(const FieldId& field_id) const {
+  return StringDataByIdx(field_id.name_idx_);
+}
+
+inline const char* DexFile::GetMethodDeclaringClassDescriptor(const MethodId& method_id) const {
+  const DexFile::TypeId& type_id = GetTypeId(method_id.class_idx_);
+  return GetTypeDescriptor(type_id);
+}
+
 inline const Signature DexFile::GetMethodSignature(const MethodId& method_id) const {
   return Signature(this, GetProtoId(method_id.proto_idx_));
 }
 
+inline const char* DexFile::GetMethodName(const MethodId& method_id) const {
+  return StringDataByIdx(method_id.name_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(uint32_t idx) const {
+  return StringDataByIdx(GetProtoId(GetMethodId(idx).proto_idx_).shorty_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(const MethodId& method_id) const {
+  return StringDataByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_);
+}
+
+inline const char* DexFile::GetMethodShorty(const MethodId& method_id, uint32_t* length) const {
+  // Using the UTF16 length is safe here as shorties are guaranteed to be ASCII characters.
+  return StringDataAndUtf16LengthByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_, length);
+}
+
+inline const char* DexFile::GetClassDescriptor(const ClassDef& class_def) const {
+  return StringByTypeIdx(class_def.class_idx_);
+}
+
+inline const char* DexFile::GetReturnTypeDescriptor(const ProtoId& proto_id) const {
+  return StringByTypeIdx(proto_id.return_type_idx_);
+}
+
+inline const char* DexFile::GetShorty(uint32_t proto_idx) const {
+  const ProtoId& proto_id = GetProtoId(proto_idx);
+  return StringDataByIdx(proto_id.shorty_idx_);
+}
+
 inline const DexFile::TryItem* DexFile::GetTryItems(const CodeItem& code_item, uint32_t offset) {
   const uint16_t* insns_end_ = &code_item.insns_[code_item.insns_size_in_code_units_];
   return reinterpret_cast<const TryItem*>
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 05c95e0..5a203af 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -109,7 +109,7 @@
   }
   if (IsDexMagic(magic)) {
     std::unique_ptr<const DexFile> dex_file(
-        DexFile::OpenFile(fd.release(), filename, false, error_msg));
+        DexFile::OpenFile(fd.release(), filename, false, false, error_msg));
     if (dex_file.get() == nullptr) {
       return false;
     }
@@ -120,7 +120,10 @@
   return false;
 }
 
-bool DexFile::Open(const char* filename, const char* location, std::string* error_msg,
+bool DexFile::Open(const char* filename,
+                   const char* location,
+                   bool verify_checksum,
+                   std::string* error_msg,
                    std::vector<std::unique_ptr<const DexFile>>* dex_files) {
   ScopedTrace trace(std::string("Open dex file ") + location);
   DCHECK(dex_files != nullptr) << "DexFile::Open: out-param is nullptr";
@@ -131,10 +134,13 @@
     return false;
   }
   if (IsZipMagic(magic)) {
-    return DexFile::OpenZip(fd.release(), location, error_msg, dex_files);
+    return DexFile::OpenZip(fd.release(), location, verify_checksum, error_msg, dex_files);
   }
   if (IsDexMagic(magic)) {
-    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), location, true,
+    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.release(),
+                                                              location,
+                                                              /* verify */ true,
+                                                              verify_checksum,
                                                               error_msg));
     if (dex_file.get() != nullptr) {
       dex_files->push_back(std::move(dex_file));
@@ -207,6 +213,7 @@
                                              uint32_t location_checksum,
                                              const OatDexFile* oat_dex_file,
                                              bool verify,
+                                             bool verify_checksum,
                                              std::string* error_msg) {
   ScopedTrace trace(std::string("Open dex file from RAM ") + location);
   std::unique_ptr<const DexFile> dex_file = OpenMemory(base,
@@ -220,6 +227,7 @@
                                          dex_file->Begin(),
                                          dex_file->Size(),
                                          location.c_str(),
+                                         verify_checksum,
                                          error_msg)) {
     return nullptr;
   }
@@ -227,7 +235,10 @@
   return dex_file;
 }
 
-std::unique_ptr<const DexFile> DexFile::OpenFile(int fd, const char* location, bool verify,
+std::unique_ptr<const DexFile> DexFile::OpenFile(int fd,
+                                                 const char* location,
+                                                 bool verify,
+                                                 bool verify_checksum,
                                                  std::string* error_msg) {
   ScopedTrace trace(std::string("Open dex file ") + location);
   CHECK(location != nullptr);
@@ -276,7 +287,9 @@
   }
 
   if (verify && !DexFileVerifier::Verify(dex_file.get(), dex_file->Begin(), dex_file->Size(),
-                                         location, error_msg)) {
+                                         location,
+                                         verify_checksum,
+                                         error_msg)) {
     return nullptr;
   }
 
@@ -285,7 +298,10 @@
 
 const char* DexFile::kClassesDex = "classes.dex";
 
-bool DexFile::OpenZip(int fd, const std::string& location, std::string* error_msg,
+bool DexFile::OpenZip(int fd,
+                      const std::string& location,
+                      bool verify_checksum,
+                      std::string* error_msg,
                       std::vector<std::unique_ptr<const DexFile>>* dex_files) {
   ScopedTrace trace("Dex file open Zip " + std::string(location));
   DCHECK(dex_files != nullptr) << "DexFile::OpenZip: out-param is nullptr";
@@ -294,7 +310,7 @@
     DCHECK(!error_msg->empty());
     return false;
   }
-  return DexFile::OpenFromZip(*zip_archive, location, error_msg, dex_files);
+  return DexFile::OpenFromZip(*zip_archive, location, verify_checksum, error_msg, dex_files);
 }
 
 std::unique_ptr<const DexFile> DexFile::OpenMemory(const std::string& location,
@@ -310,8 +326,11 @@
                     error_msg);
 }
 
-std::unique_ptr<const DexFile> DexFile::Open(const ZipArchive& zip_archive, const char* entry_name,
-                                             const std::string& location, std::string* error_msg,
+std::unique_ptr<const DexFile> DexFile::Open(const ZipArchive& zip_archive,
+                                             const char* entry_name,
+                                             const std::string& location,
+                                             bool verify_checksum,
+                                             std::string* error_msg,
                                              ZipOpenErrorCode* error_code) {
   ScopedTrace trace("Dex file open from Zip Archive " + std::string(location));
   CHECK(!location.empty());
@@ -342,7 +361,9 @@
   }
   CHECK(dex_file->IsReadOnly()) << location;
   if (!DexFileVerifier::Verify(dex_file.get(), dex_file->Begin(), dex_file->Size(),
-                               location.c_str(), error_msg)) {
+                               location.c_str(),
+                               verify_checksum,
+                               error_msg)) {
     *error_code = ZipOpenErrorCode::kVerifyError;
     return nullptr;
   }
@@ -356,14 +377,16 @@
 // seems an excessive number.
 static constexpr size_t kWarnOnManyDexFilesThreshold = 100;
 
-bool DexFile::OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
+bool DexFile::OpenFromZip(const ZipArchive& zip_archive,
+                          const std::string& location,
+                          bool verify_checksum,
                           std::string* error_msg,
                           std::vector<std::unique_ptr<const DexFile>>* dex_files) {
   ScopedTrace trace("Dex file open from Zip " + std::string(location));
   DCHECK(dex_files != nullptr) << "DexFile::OpenFromZip: out-param is nullptr";
   ZipOpenErrorCode error_code;
-  std::unique_ptr<const DexFile> dex_file(Open(zip_archive, kClassesDex, location, error_msg,
-                                               &error_code));
+  std::unique_ptr<const DexFile> dex_file(
+      Open(zip_archive, kClassesDex, location, verify_checksum, error_msg, &error_code));
   if (dex_file.get() == nullptr) {
     return false;
   } else {
@@ -378,8 +401,8 @@
     for (size_t i = 1; ; ++i) {
       std::string name = GetMultiDexClassesDexName(i);
       std::string fake_location = GetMultiDexLocation(i, location.c_str());
-      std::unique_ptr<const DexFile> next_dex_file(Open(zip_archive, name.c_str(), fake_location,
-                                                        error_msg, &error_code));
+      std::unique_ptr<const DexFile> next_dex_file(
+          Open(zip_archive, name.c_str(), fake_location, verify_checksum, error_msg, &error_code));
       if (next_dex_file.get() == nullptr) {
         if (error_code != ZipOpenErrorCode::kEntryNotFound) {
           LOG(WARNING) << error_msg;
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 638821b..3dffe4b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -416,7 +416,10 @@
   static bool GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg);
 
   // Opens .dex files found in the container, guessing the container format based on file extension.
-  static bool Open(const char* filename, const char* location, std::string* error_msg,
+  static bool Open(const char* filename,
+                   const char* location,
+                   bool verify_checksum,
+                   std::string* error_msg,
                    std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
   // Checks whether the given file has the dex magic, or is a zip file with a classes.dex entry.
@@ -429,10 +432,13 @@
                                              uint32_t location_checksum,
                                              const OatDexFile* oat_dex_file,
                                              bool verify,
+                                             bool verify_checksum,
                                              std::string* error_msg);
 
   // Open all classesXXX.dex files from a zip archive.
-  static bool OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
+  static bool OpenFromZip(const ZipArchive& zip_archive,
+                          const std::string& location,
+                          bool verify_checksum,
                           std::string* error_msg,
                           std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
@@ -522,25 +528,12 @@
   // as the string length of the string data.
   const char* GetStringDataAndUtf16Length(const StringId& string_id, uint32_t* utf16_length) const;
 
-  const char* GetStringData(const StringId& string_id) const {
-    uint32_t ignored;
-    return GetStringDataAndUtf16Length(string_id, &ignored);
-  }
+  const char* GetStringData(const StringId& string_id) const;
 
   // Index version of GetStringDataAndUtf16Length.
-  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const {
-    if (idx == kDexNoIndex) {
-      *utf16_length = 0;
-      return nullptr;
-    }
-    const StringId& string_id = GetStringId(idx);
-    return GetStringDataAndUtf16Length(string_id, utf16_length);
-  }
+  const char* StringDataAndUtf16LengthByIdx(uint32_t idx, uint32_t* utf16_length) const;
 
-  const char* StringDataByIdx(uint32_t idx) const {
-    uint32_t unicode_length;
-    return StringDataAndUtf16LengthByIdx(idx, &unicode_length);
-  }
+  const char* StringDataByIdx(uint32_t idx) const;
 
   // Looks up a string id for a given modified utf8 string.
   const StringId* FindStringId(const char* string) const;
@@ -571,20 +564,12 @@
   }
 
   // Get the descriptor string associated with a given type index.
-  const char* StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const {
-    const TypeId& type_id = GetTypeId(idx);
-    return StringDataAndUtf16LengthByIdx(type_id.descriptor_idx_, unicode_length);
-  }
+  const char* StringByTypeIdx(uint32_t idx, uint32_t* unicode_length) const;
 
-  const char* StringByTypeIdx(uint32_t idx) const {
-    const TypeId& type_id = GetTypeId(idx);
-    return StringDataByIdx(type_id.descriptor_idx_);
-  }
+  const char* StringByTypeIdx(uint32_t idx) const;
 
   // Returns the type descriptor string of a type id.
-  const char* GetTypeDescriptor(const TypeId& type_id) const {
-    return StringDataByIdx(type_id.descriptor_idx_);
-  }
+  const char* GetTypeDescriptor(const TypeId& type_id) const;
 
   // Looks up a type for the given string index
   const TypeId* FindTypeId(uint32_t string_idx) const;
@@ -619,15 +604,10 @@
   }
 
   // Returns the class descriptor string of a field id.
-  const char* GetFieldTypeDescriptor(const FieldId& field_id) const {
-    const DexFile::TypeId& type_id = GetTypeId(field_id.type_idx_);
-    return GetTypeDescriptor(type_id);
-  }
+  const char* GetFieldTypeDescriptor(const FieldId& field_id) const;
 
   // Returns the name of a field id.
-  const char* GetFieldName(const FieldId& field_id) const {
-    return StringDataByIdx(field_id.name_idx_);
-  }
+  const char* GetFieldName(const FieldId& field_id) const;
 
   // Returns the number of method identifiers in the .dex file.
   size_t NumMethodIds() const {
@@ -653,10 +633,7 @@
                                const DexFile::ProtoId& signature) const;
 
   // Returns the declaring class descriptor string of a method id.
-  const char* GetMethodDeclaringClassDescriptor(const MethodId& method_id) const {
-    const DexFile::TypeId& type_id = GetTypeId(method_id.class_idx_);
-    return GetTypeDescriptor(type_id);
-  }
+  const char* GetMethodDeclaringClassDescriptor(const MethodId& method_id) const;
 
   // Returns the prototype of a method id.
   const ProtoId& GetMethodPrototype(const MethodId& method_id) const {
@@ -667,23 +644,15 @@
   const Signature GetMethodSignature(const MethodId& method_id) const;
 
   // Returns the name of a method id.
-  const char* GetMethodName(const MethodId& method_id) const {
-    return StringDataByIdx(method_id.name_idx_);
-  }
+  const char* GetMethodName(const MethodId& method_id) const;
 
   // Returns the shorty of a method by its index.
-  const char* GetMethodShorty(uint32_t idx) const {
-    return StringDataByIdx(GetProtoId(GetMethodId(idx).proto_idx_).shorty_idx_);
-  }
+  const char* GetMethodShorty(uint32_t idx) const;
 
   // Returns the shorty of a method id.
-  const char* GetMethodShorty(const MethodId& method_id) const {
-    return StringDataByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_);
-  }
-  const char* GetMethodShorty(const MethodId& method_id, uint32_t* length) const {
-    // Using the UTF16 length is safe here as shorties are guaranteed to be ASCII characters.
-    return StringDataAndUtf16LengthByIdx(GetProtoId(method_id.proto_idx_).shorty_idx_, length);
-  }
+  const char* GetMethodShorty(const MethodId& method_id) const;
+  const char* GetMethodShorty(const MethodId& method_id, uint32_t* length) const;
+
   // Returns the number of class definitions in the .dex file.
   uint32_t NumClassDefs() const {
     DCHECK(header_ != nullptr) << GetLocation();
@@ -703,9 +672,7 @@
   }
 
   // Returns the class descriptor string of a class definition.
-  const char* GetClassDescriptor(const ClassDef& class_def) const {
-    return StringByTypeIdx(class_def.class_idx_);
-  }
+  const char* GetClassDescriptor(const ClassDef& class_def) const;
 
   // Looks up a class definition by its class descriptor. Hash must be
   // ComputeModifiedUtf8Hash(descriptor).
@@ -743,9 +710,7 @@
     }
   }
 
-  const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const {
-    return StringByTypeIdx(proto_id.return_type_idx_);
-  }
+  const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const;
 
   // Returns the number of prototype identifiers in the .dex file.
   size_t NumProtoIds() const {
@@ -782,10 +747,7 @@
   const Signature CreateSignature(const StringPiece& signature) const;
 
   // Returns the short form method descriptor for the given prototype.
-  const char* GetShorty(uint32_t proto_idx) const {
-    const ProtoId& proto_id = GetProtoId(proto_idx);
-    return StringDataByIdx(proto_id.shorty_idx_);
-  }
+  const char* GetShorty(uint32_t proto_idx) const;
 
   const TypeList* GetProtoParameters(const ProtoId& proto_id) const {
     if (proto_id.parameters_off_ == 0) {
@@ -1177,11 +1139,17 @@
 
  private:
   // Opens a .dex file
-  static std::unique_ptr<const DexFile> OpenFile(int fd, const char* location,
-                                                 bool verify, std::string* error_msg);
+  static std::unique_ptr<const DexFile> OpenFile(int fd,
+                                                 const char* location,
+                                                 bool verify,
+                                                 bool verify_checksum,
+                                                 std::string* error_msg);
 
   // Opens dex files from within a .jar, .zip, or .apk file
-  static bool OpenZip(int fd, const std::string& location, std::string* error_msg,
+  static bool OpenZip(int fd,
+                      const std::string& location,
+                      bool verify_checksum,
+                      std::string* error_msg,
                       std::vector<std::unique_ptr<const DexFile>>* dex_files);
 
   enum class ZipOpenErrorCode {  // private
@@ -1195,8 +1163,11 @@
 
   // Opens .dex file from the entry_name in a zip archive. error_code is undefined when non-null
   // return.
-  static std::unique_ptr<const DexFile> Open(const ZipArchive& zip_archive, const char* entry_name,
-                                             const std::string& location, std::string* error_msg,
+  static std::unique_ptr<const DexFile> Open(const ZipArchive& zip_archive,
+                                             const char* entry_name,
+                                             const std::string& location,
+                                             bool verify_checksum,
+                                             std::string* error_msg,
                                              ZipOpenErrorCode* error_code);
 
   // Opens a .dex file at the given address backed by a MemMap
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 796701d..4f8e6f1 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -154,9 +154,10 @@
 
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
+  static constexpr bool kVerifyChecksum = true;
   std::string error_msg;
   std::vector<std::unique_ptr<const DexFile>> tmp;
-  bool success = DexFile::Open(location, location, &error_msg, &tmp);
+  bool success = DexFile::Open(location, location, kVerifyChecksum, &error_msg, &tmp);
   CHECK(success) << error_msg;
   EXPECT_EQ(1U, tmp.size());
   std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 1d24349..5132efc 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -128,9 +128,14 @@
     error_stmt;                                               \
   }
 
-bool DexFileVerifier::Verify(const DexFile* dex_file, const uint8_t* begin, size_t size,
-                             const char* location, std::string* error_msg) {
-  std::unique_ptr<DexFileVerifier> verifier(new DexFileVerifier(dex_file, begin, size, location));
+bool DexFileVerifier::Verify(const DexFile* dex_file,
+                             const uint8_t* begin,
+                             size_t size,
+                             const char* location,
+                             bool verify_checksum,
+                             std::string* error_msg) {
+  std::unique_ptr<DexFileVerifier> verifier(
+      new DexFileVerifier(dex_file, begin, size, location, verify_checksum));
   if (!verifier->Verify()) {
     *error_msg = verifier->FailureReason();
     return false;
@@ -273,8 +278,13 @@
   const uint8_t* non_sum_ptr = reinterpret_cast<const uint8_t*>(header_) + non_sum;
   adler_checksum = adler32(adler_checksum, non_sum_ptr, expected_size - non_sum);
   if (adler_checksum != header_->checksum_) {
-    ErrorStringPrintf("Bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
-    return false;
+    if (verify_checksum_) {
+      ErrorStringPrintf("Bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
+      return false;
+    } else {
+      LOG(WARNING) << StringPrintf(
+          "Ignoring bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
+    }
   }
 
   // Check the contents of the header.
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 90409db..133e432 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -26,17 +26,31 @@
 
 class DexFileVerifier {
  public:
-  static bool Verify(const DexFile* dex_file, const uint8_t* begin, size_t size,
-                     const char* location, std::string* error_msg);
+  static bool Verify(const DexFile* dex_file,
+                     const uint8_t* begin,
+                     size_t size,
+                     const char* location,
+                     bool verify_checksum,
+                     std::string* error_msg);
 
   const std::string& FailureReason() const {
     return failure_reason_;
   }
 
  private:
-  DexFileVerifier(const DexFile* dex_file, const uint8_t* begin, size_t size, const char* location)
-      : dex_file_(dex_file), begin_(begin), size_(size), location_(location),
-        header_(&dex_file->GetHeader()), ptr_(nullptr), previous_item_(nullptr)  {
+  DexFileVerifier(const DexFile* dex_file,
+                  const uint8_t* begin,
+                  size_t size,
+                  const char* location,
+                  bool verify_checksum)
+      : dex_file_(dex_file),
+        begin_(begin),
+        size_(size),
+        location_(location),
+        verify_checksum_(verify_checksum),
+        header_(&dex_file->GetHeader()),
+        ptr_(nullptr),
+        previous_item_(nullptr)  {
   }
 
   bool Verify();
@@ -176,6 +190,7 @@
   const uint8_t* const begin_;
   const size_t size_;
   const char* const location_;
+  const bool verify_checksum_;
   const DexFile::Header* const header_;
 
   struct OffsetTypeMapEmptyFn {
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 4e53914..71c0ad9 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -122,6 +122,10 @@
 
 class DexFileVerifierTest : public CommonRuntimeTest {
  protected:
+  DexFile* GetDexFile(const uint8_t* dex_bytes, size_t length) {
+    return new DexFile(dex_bytes, length, "tmp", 0, nullptr, nullptr);
+  }
+
   void VerifyModification(const char* dex_file_base64_content,
                           const char* location,
                           std::function<void(DexFile*)> f,
@@ -130,16 +134,17 @@
     std::unique_ptr<uint8_t[]> dex_bytes = DecodeBase64(dex_file_base64_content, &length);
     CHECK(dex_bytes != nullptr);
     // Note: `dex_file` will be destroyed before `dex_bytes`.
-    std::unique_ptr<DexFile> dex_file(
-        new DexFile(dex_bytes.get(), length, "tmp", 0, nullptr, nullptr));
+    std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
     f(dex_file.get());
     FixUpChecksum(const_cast<uint8_t*>(dex_file->Begin()));
 
+    static constexpr bool kVerifyChecksum = true;
     std::string error_msg;
     bool success = DexFileVerifier::Verify(dex_file.get(),
                                            dex_file->Begin(),
                                            dex_file->Size(),
                                            location,
+                                           kVerifyChecksum,
                                            &error_msg);
     if (expected_error == nullptr) {
       EXPECT_TRUE(success) << error_msg;
@@ -175,7 +180,7 @@
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
   std::vector<std::unique_ptr<const DexFile>> tmp;
-  bool success = DexFile::Open(location, location, error_msg, &tmp);
+  bool success = DexFile::Open(location, location, true, error_msg, &tmp);
   CHECK(success) << error_msg;
   EXPECT_EQ(1U, tmp.size());
   std::unique_ptr<const DexFile> dex_file = std::move(tmp[0]);
@@ -1697,4 +1702,45 @@
       " implemented interface with type idx: '0'");
 }
 
+TEST_F(DexFileVerifierTest, Checksum) {
+  size_t length;
+  std::unique_ptr<uint8_t[]> dex_bytes = DecodeBase64(kGoodTestDex, &length);
+  CHECK(dex_bytes != nullptr);
+  // Note: `dex_file` will be destroyed before `dex_bytes`.
+  std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length));
+  std::string error_msg;
+
+  // Good checksum: all pass.
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                       "good checksum, no verify",
+                                      /*verify_checksum*/ false,
+                                      &error_msg));
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                      "good checksum, verify",
+                                      /*verify_checksum*/ true,
+                                      &error_msg));
+
+  // Bad checksum: !verify_checksum passes verify_checksum fails.
+  DexFile::Header* header = reinterpret_cast<DexFile::Header*>(
+      const_cast<uint8_t*>(dex_file->Begin()));
+  header->checksum_ = 0;
+  EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(),
+                                      dex_file->Begin(),
+                                      dex_file->Size(),
+                                      "bad checksum, no verify",
+                                      /*verify_checksum*/ false,
+                                      &error_msg));
+  EXPECT_FALSE(DexFileVerifier::Verify(dex_file.get(),
+                                       dex_file->Begin(),
+                                       dex_file->Size(),
+                                       "bad checksum, verify",
+                                       /*verify_checksum*/ true,
+                                       &error_msg));
+  EXPECT_NE(error_msg.find("Bad checksum"), std::string::npos) << error_msg;
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index db3f88f..d61d0aa 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -19,7 +19,7 @@
 
 #include "entrypoint_utils.h"
 
-#include "art_method-inl.h"
+#include "art_method.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file.h"
@@ -39,45 +39,83 @@
 
 namespace art {
 
-template <bool kResolve = true>
 inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method,
                                     const InlineInfo& inline_info,
                                     const InlineInfoEncoding& encoding,
                                     uint8_t inlining_depth)
-  SHARED_REQUIRES(Locks::mutator_lock_) {
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // This method is being used by artQuickResolutionTrampoline, before it sets up
+  // the passed parameters in a GC friendly way. Therefore we must never be
+  // suspended while executing it.
+  ScopedAssertNoThreadSuspension sants(Thread::Current(), __FUNCTION__);
+
   uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, inlining_depth);
   InvokeType invoke_type = static_cast<InvokeType>(
         inline_info.GetInvokeTypeAtDepth(encoding, inlining_depth));
-  ArtMethod* caller = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*));
-  if (!caller->IsRuntimeMethod()) {
-    return caller;
-  }
-  if (!kResolve) {
-    return nullptr;
+  ArtMethod* inlined_method = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*));
+  if (!inlined_method->IsRuntimeMethod()) {
+    return inlined_method;
   }
 
-  // The method in the dex cache can be the runtime method responsible for invoking
+  // The method in the dex cache is the runtime method responsible for invoking
   // the stub that will then update the dex cache. Therefore, we need to do the
   // resolution ourselves.
 
-  // We first find the class loader of our caller. If it is the outer method, we can directly
-  // use its class loader. Otherwise, we also need to resolve our caller.
-  StackHandleScope<2> hs(Thread::Current());
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  MutableHandle<mirror::ClassLoader> class_loader(hs.NewHandle<mirror::Class>(nullptr));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(outer_method->GetDexCache()));
-  if (inlining_depth == 0) {
-    class_loader.Assign(outer_method->GetClassLoader());
+  // We first find the dex cache of our caller. If it is the outer method, we can directly
+  // use its dex cache. Otherwise, we also need to resolve our caller.
+  ArtMethod* caller = outer_method;
+  if (inlining_depth != 0) {
+    caller = GetResolvedMethod(outer_method,
+                               inline_info,
+                               encoding,
+                               inlining_depth - 1);
+  }
+  DCHECK_EQ(caller->GetDexCache(), outer_method->GetDexCache())
+      << "Compiler only supports inlining calls within the same dex cache";
+  const DexFile* dex_file = outer_method->GetDexFile();
+  const DexFile::MethodId& method_id = dex_file->GetMethodId(method_index);
+
+  if (inline_info.GetDexPcAtDepth(encoding, inlining_depth) == static_cast<uint32_t>(-1)) {
+    // "charAt" special case. It is the only non-leaf method we inline across dex files.
+    if (kIsDebugBuild) {
+      const char* name = dex_file->StringDataByIdx(method_id.name_idx_);
+      DCHECK_EQ(std::string(name), "charAt");
+      DCHECK_EQ(std::string(dex_file->GetMethodShorty(method_id)), "CI")
+          << std::string(dex_file->GetMethodShorty(method_id));
+      DCHECK_EQ(std::string(dex_file->StringByTypeIdx(method_id.class_idx_)), "Ljava/lang/String;")
+          << std::string(dex_file->StringByTypeIdx(method_id.class_idx_));
+    }
+    mirror::Class* cls =
+        Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangString);
+    // Update the dex cache for future lookups.
+    caller->GetDexCache()->SetResolvedType(method_id.class_idx_, cls);
+    inlined_method = cls->FindVirtualMethod("charAt", "(I)C", sizeof(void*));
   } else {
-    caller = GetResolvedMethod<kResolve>(outer_method,
-                                         inline_info,
-                                         encoding,
-                                         inlining_depth - 1);
-    class_loader.Assign(caller->GetClassLoader());
+    mirror::Class* klass = caller->GetDexCache()->GetResolvedType(method_id.class_idx_);
+    DCHECK_EQ(klass->GetDexCache(), caller->GetDexCache())
+        << "Compiler only supports inlining calls within the same dex cache";
+    switch (invoke_type) {
+      case kDirect:
+      case kStatic:
+        inlined_method =
+            klass->FindDirectMethod(klass->GetDexCache(), method_index, sizeof(void*));
+        break;
+      case kSuper:
+      case kVirtual:
+        inlined_method =
+            klass->FindVirtualMethod(klass->GetDexCache(), method_index, sizeof(void*));
+        break;
+      default:
+        LOG(FATAL) << "Unimplemented inlined invocation type: " << invoke_type;
+        UNREACHABLE();
+    }
   }
 
-  return class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
-      *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type);
+  // Update the dex cache for future lookups. Note that for static methods, this is safe
+  // when the class is being initialized, as the entrypoint for the ArtMethod is at
+  // this point still the resolution trampoline.
+  outer_method->SetDexCacheResolvedMethod(method_index, inlined_method, sizeof(void*));
+  return inlined_method;
 }
 
 inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type)
@@ -448,23 +486,10 @@
                      : ClassLinker::kNoICCECheckForCache;
     resolved_method = class_linker->ResolveMethod<resolve_mode>(self, method_idx, referrer, type);
   }
+  // Resolution and access check.
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
-  } else if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
-    if (UNLIKELY(resolved_method->GetDeclaringClass()->IsStringClass() &&
-                 resolved_method->IsConstructor())) {
-      // Hack for String init:
-      //
-      // We assume that the input of String.<init> in verified code is always
-      // an unitialized reference. If it is a null constant, it must have been
-      // optimized out by the compiler. Do not throw NullPointerException.
-    } else {
-      // Maintain interpreter-like semantics where NullPointerException is thrown
-      // after potential NoSuchMethodError from class linker.
-      ThrowNullPointerExceptionForMethodAccess(method_idx, type);
-      return nullptr;  // Failure.
-    }
   } else if (access_check) {
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
     bool can_access_resolved_method =
@@ -482,6 +507,22 @@
       return nullptr;  // Failure.
     }
   }
+  // Next, null pointer check.
+  if (UNLIKELY(*this_object == nullptr && type != kStatic)) {
+    if (UNLIKELY(resolved_method->GetDeclaringClass()->IsStringClass() &&
+                 resolved_method->IsConstructor())) {
+      // Hack for String init:
+      //
+      // We assume that the input of String.<init> in verified code is always
+      // an unitialized reference. If it is a null constant, it must have been
+      // optimized out by the compiler. Do not throw NullPointerException.
+    } else {
+      // Maintain interpreter-like semantics where NullPointerException is thrown
+      // after potential NoSuchMethodError from class linker.
+      ThrowNullPointerExceptionForMethodAccess(method_idx, type);
+      return nullptr;  // Failure.
+    }
+  }
   switch (type) {
     case kStatic:
     case kDirect:
@@ -559,10 +600,9 @@
       }
     }
     case kInterface: {
-      uint32_t imt_index = resolved_method->GetImtIndex();
-      size_t pointer_size = class_linker->GetImagePointerSize();
-      ArtMethod* imt_method = (*this_object)->GetClass()->GetImt(pointer_size)->
-          Get(imt_index, pointer_size);
+      uint32_t imt_index = resolved_method->GetDexMethodIndex() % mirror::Class::kImtSize;
+      ArtMethod* imt_method = (*this_object)->GetClass()->GetEmbeddedImTableEntry(
+          imt_index, class_linker->GetImagePointerSize());
       if (!imt_method->IsRuntimeMethod()) {
         if (kIsDebugBuild) {
           mirror::Class* klass = (*this_object)->GetClass();
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index f3a0d2f..d0dad34 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -120,6 +120,8 @@
 extern "C" void art_quick_throw_div_zero();
 extern "C" void art_quick_throw_no_such_method(int32_t method_idx);
 extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_null_pointer_exception_from_signal(uintptr_t address);
 extern "C" void art_quick_throw_stack_overflow(void*);
+extern "C" void art_quick_throw_string_bounds(int32_t index, int32_t limit);
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_EXTERNS_H_
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 5dafa8b..f98de95 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -114,6 +114,7 @@
   qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
+  qpoints->pThrowStringBounds = art_quick_throw_string_bounds;
 
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 79d1c13..30b639e 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -140,6 +140,7 @@
   V(ThrowNoSuchMethod, void, int32_t) \
   V(ThrowNullPointer, void, void) \
   V(ThrowStackOverflow, void, void*) \
+  V(ThrowStringBounds, void, int32_t, int32_t) \
   V(Deoptimize, void, void) \
 \
   V(A64Load, int64_t, volatile const int64_t *) \
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 5256fea..ea9f7b0 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -29,7 +29,7 @@
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw an exception.
+// Called by generated code to throw an exception.
 extern "C" NO_RETURN void artDeliverExceptionFromCode(mirror::Throwable* exception, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   /*
@@ -48,17 +48,27 @@
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw a NPE exception.
+// Called by generated code to throw a NPE exception.
 extern "C" NO_RETURN void artThrowNullPointerExceptionFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
+  // We come from an explicit check in the generated code. This path is triggered
+  // only if the object is indeed null.
+  ThrowNullPointerExceptionFromDexPC(/* check_address */ false, 0U);
+  self->QuickDeliverException();
+}
+
+// Installed by a signal handler to throw a NPE exception.
+extern "C" NO_RETURN void artThrowNullPointerExceptionFromSignal(uintptr_t addr, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
   self->NoteSignalBeingHandled();
-  ThrowNullPointerExceptionFromDexPC();
+  ThrowNullPointerExceptionFromDexPC(/* check_address */ true, addr);
   self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw an arithmetic divide by zero exception.
+// Called by generated code to throw an arithmetic divide by zero exception.
 extern "C" NO_RETURN void artThrowDivZeroFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -66,7 +76,7 @@
   self->QuickDeliverException();
 }
 
-// Called by generated call to throw an array index out of bounds exception.
+// Called by generated code to throw an array index out of bounds exception.
 extern "C" NO_RETURN void artThrowArrayBoundsFromCode(int index, int length, Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
@@ -74,6 +84,14 @@
   self->QuickDeliverException();
 }
 
+// Called by generated code to throw a string index out of bounds exception.
+extern "C" NO_RETURN void artThrowStringBoundsFromCode(int index, int length, Thread* self)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  ThrowStringIndexOutOfBoundsException(index, length);
+  self->QuickDeliverException();
+}
+
 extern "C" NO_RETURN void artThrowStackOverflowFromCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 0a70be1..03771aa 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2169,12 +2169,13 @@
       dex_method_idx, sizeof(void*));
   DCHECK(interface_method != nullptr) << dex_method_idx << " " << PrettyMethod(caller_method);
   ArtMethod* method = nullptr;
-  ImTable* imt = cls->GetImt(sizeof(void*));
 
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
     // If the dex cache already resolved the interface method, look whether we have
     // a match in the ImtConflictTable.
-    ArtMethod* conflict_method = imt->Get(interface_method->GetImtIndex(), sizeof(void*));
+    uint32_t imt_index = interface_method->GetDexMethodIndex();
+    ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
+        imt_index % mirror::Class::kImtSize, sizeof(void*));
     if (LIKELY(conflict_method->IsRuntimeMethod())) {
       ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
       DCHECK(current_table != nullptr);
@@ -2225,8 +2226,9 @@
 
   // We arrive here if we have found an implementation, and it is not in the ImtConflictTable.
   // We create a new table with the new pair { interface_method, method }.
-  uint32_t imt_index = interface_method->GetImtIndex();
-  ArtMethod* conflict_method = imt->Get(imt_index, sizeof(void*));
+  uint32_t imt_index = interface_method->GetDexMethodIndex();
+  ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
+      imt_index % mirror::Class::kImtSize, sizeof(void*));
   if (conflict_method->IsRuntimeMethod()) {
     ArtMethod* new_conflict_method = Runtime::Current()->GetClassLinker()->AddMethodToConflictTable(
         cls.Get(),
@@ -2237,9 +2239,9 @@
     if (new_conflict_method != conflict_method) {
       // Update the IMT if we create a new conflict method. No fence needed here, as the
       // data is consistent.
-      imt->Set(imt_index,
-               new_conflict_method,
-               sizeof(void*));
+      cls->SetEmbeddedImTableEntry(imt_index % mirror::Class::kImtSize,
+                                  new_conflict_method,
+                                  sizeof(void*));
     }
   }
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 91deea0..7a624b2 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -121,10 +121,10 @@
 
     // Skip across the entrypoints structures.
 
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, thread_local_start, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, mterp_current_ibase, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_alt_ibase, rosalloc_runs, sizeof(void*));
@@ -287,7 +287,8 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowDivZero, pThrowNoSuchMethod, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowNoSuchMethod, pThrowNullPointer, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowNullPointer, pThrowStackOverflow, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStackOverflow, pDeoptimize, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStackOverflow, pThrowStringBounds, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pThrowStringBounds, pDeoptimize, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pDeoptimize, pA64Load, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pA64Load, pA64Store, sizeof(void*));
 
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 625b1e8..56e0fb7 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -96,6 +96,14 @@
 
   bool Action(int sig, siginfo_t* siginfo, void* context) OVERRIDE;
 
+  static bool IsValidImplicitCheck(siginfo_t* siginfo) {
+    // Our implicit NPE checks always limit the range to a page.
+    // Note that the runtime will do more exhaustive checks (that we cannot
+    // reasonably do in signal processing code) based on the dex instruction
+    // faulting.
+    return CanDoImplicitNullCheckOn(reinterpret_cast<uintptr_t>(siginfo->si_addr));
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(NullPointerHandler);
 };
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index d9f1507..6489a39 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -187,7 +187,7 @@
  public:
   AllocRecordStackVisitor(Thread* thread, size_t max_depth, AllocRecordStackTrace* trace_out)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve),
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         max_depth_(max_depth),
         trace_(trace_out) {}
 
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 64fa434..3011112 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -28,7 +28,7 @@
 namespace gc {
 namespace collector {
 
-inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegionOrImmuneSpace(
+inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegion(
     mirror::Object* ref, accounting::ContinuousSpaceBitmap* bitmap) {
   // For the Baker-style RB, in a rare case, we could incorrectly change the object from white
   // to gray even though the object has already been marked through. This happens if a mutator
@@ -69,6 +69,37 @@
   return ref;
 }
 
+template<bool kGrayImmuneObject>
+inline mirror::Object* ConcurrentCopying::MarkImmuneSpace(mirror::Object* ref) {
+  if (kUseBakerReadBarrier) {
+    // The GC-running thread doesn't (need to) gray immune objects except when updating thread roots
+    // in the thread flip on behalf of suspended threads (when gc_grays_immune_objects_ is
+    // true). Also, a mutator doesn't (need to) gray an immune object after GC has updated all
+    // immune space objects (when updated_all_immune_objects_ is true).
+    if (kIsDebugBuild) {
+      if (Thread::Current() == thread_running_gc_) {
+        DCHECK(!kGrayImmuneObject ||
+               updated_all_immune_objects_.LoadRelaxed() ||
+               gc_grays_immune_objects_);
+      } else {
+        DCHECK(kGrayImmuneObject);
+      }
+    }
+    if (!kGrayImmuneObject || updated_all_immune_objects_.LoadRelaxed()) {
+      return ref;
+    }
+    // This may or may not succeed, which is ok because the object may already be gray.
+    bool success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                    ReadBarrier::GrayPtr());
+    if (success) {
+      MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
+      immune_gray_stack_.push_back(ref);
+    }
+  }
+  return ref;
+}
+
+template<bool kGrayImmuneObject>
 inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
   if (from_ref == nullptr) {
     return nullptr;
@@ -109,10 +140,14 @@
       return to_ref;
     }
     case space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace: {
-      return MarkUnevacFromSpaceRegionOrImmuneSpace(from_ref, region_space_bitmap_);
+      return MarkUnevacFromSpaceRegion(from_ref, region_space_bitmap_);
     }
     case space::RegionSpace::RegionType::kRegionTypeNone:
-      return MarkNonMoving(from_ref);
+      if (immune_spaces_.ContainsObject(from_ref)) {
+        return MarkImmuneSpace<kGrayImmuneObject>(from_ref);
+      } else {
+        return MarkNonMoving(from_ref);
+      }
     default:
       UNREACHABLE();
   }
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index dd75006..b7b5aa0 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -50,14 +50,16 @@
       mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
       thread_running_gc_(nullptr),
       is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
+      region_space_bitmap_(nullptr),
       heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff),
       weak_ref_access_enabled_(true),
       skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
       rb_table_(heap_->GetReadBarrierTable()),
-      force_evacuate_all_(false) {
+      force_evacuate_all_(false),
+      immune_gray_stack_lock_("concurrent copying immune gray stack lock",
+                              kMarkSweepMarkStackLock) {
   static_assert(space::RegionSpace::kRegionSize == accounting::ReadBarrierTable::kRegionSize,
                 "The region space size and the read barrier table region size must match");
-  cc_heap_bitmap_.reset(new accounting::HeapBitmap(heap));
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -139,19 +141,10 @@
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
       immune_spaces_.AddSpace(space);
-      const char* bitmap_name = space->IsImageSpace() ? "cc image space bitmap" :
-          "cc zygote space bitmap";
-      // TODO: try avoiding using bitmaps for image/zygote to save space.
-      accounting::ContinuousSpaceBitmap* bitmap =
-          accounting::ContinuousSpaceBitmap::Create(bitmap_name, space->Begin(), space->Capacity());
-      cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap);
-      cc_bitmaps_.push_back(bitmap);
     } else if (space == region_space_) {
       accounting::ContinuousSpaceBitmap* bitmap =
           accounting::ContinuousSpaceBitmap::Create("cc region space bitmap",
                                                     space->Begin(), space->Capacity());
-      cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap);
-      cc_bitmaps_.push_back(bitmap);
       region_space_bitmap_ = bitmap;
     }
   }
@@ -179,6 +172,15 @@
   } else {
     force_evacuate_all_ = false;
   }
+  if (kUseBakerReadBarrier) {
+    updated_all_immune_objects_.StoreRelaxed(false);
+    // GC may gray immune objects in the thread flip.
+    gc_grays_immune_objects_ = true;
+    if (kIsDebugBuild) {
+      MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
+      DCHECK(immune_gray_stack_.empty());
+    }
+  }
   BindBitmaps();
   if (kVerboseMode) {
     LOG(INFO) << "force_evacuate_all=" << force_evacuate_all_;
@@ -303,30 +305,6 @@
   live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
 }
 
-// Used to visit objects in the immune spaces.
-class ConcurrentCopying::ImmuneSpaceObjVisitor {
- public:
-  explicit ImmuneSpaceObjVisitor(ConcurrentCopying* cc) : collector_(cc) {}
-
-  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
-      SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
-    DCHECK(obj != nullptr);
-    DCHECK(collector_->immune_spaces_.ContainsObject(obj));
-    accounting::ContinuousSpaceBitmap* cc_bitmap =
-        collector_->cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
-    DCHECK(cc_bitmap != nullptr)
-        << "An immune space object must have a bitmap";
-    if (kIsDebugBuild) {
-      DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj))
-          << "Immune space object must be already marked";
-    }
-    collector_->MarkUnevacFromSpaceRegionOrImmuneSpace(obj, cc_bitmap);
-  }
-
- private:
-  ConcurrentCopying* const collector_;
-};
-
 class EmptyCheckpoint : public Closure {
  public:
   explicit EmptyCheckpoint(ConcurrentCopying* concurrent_copying)
@@ -347,6 +325,27 @@
   ConcurrentCopying* const concurrent_copying_;
 };
 
+// Used to visit objects in the immune spaces.
+inline void ConcurrentCopying::ScanImmuneObject(mirror::Object* obj) {
+  DCHECK(obj != nullptr);
+  DCHECK(immune_spaces_.ContainsObject(obj));
+  // Update the fields without graying it or pushing it onto the mark stack.
+  Scan(obj);
+}
+
+class ConcurrentCopying::ImmuneSpaceScanObjVisitor {
+ public:
+  explicit ImmuneSpaceScanObjVisitor(ConcurrentCopying* cc)
+      : collector_(cc) {}
+
+  void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    collector_->ScanImmuneObject(obj);
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+};
+
 // Concurrently mark roots that are guarded by read barriers and process the mark stack.
 void ConcurrentCopying::MarkingPhase() {
   TimingLogger::ScopedTiming split("MarkingPhase", GetTimings());
@@ -354,25 +353,46 @@
     LOG(INFO) << "GC MarkingPhase";
   }
   CHECK(weak_ref_access_enabled_);
-  {
-    // Mark the image root. The WB-based collectors do not need to
-    // scan the image objects from roots by relying on the card table,
-    // but it's necessary for the RB to-space invariant to hold.
-    TimingLogger::ScopedTiming split1("VisitImageRoots", GetTimings());
-    for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
-      if (space->IsImageSpace()) {
-        gc::space::ImageSpace* image = space->AsImageSpace();
-        if (image != nullptr) {
-          mirror::ObjectArray<mirror::Object>* image_root = image->GetImageHeader().GetImageRoots();
-          mirror::Object* marked_image_root = Mark(image_root);
-          CHECK_EQ(image_root, marked_image_root) << "An image object does not move";
-          if (ReadBarrier::kEnableToSpaceInvariantChecks) {
-            AssertToSpaceInvariant(nullptr, MemberOffset(0), marked_image_root);
-          }
-        }
-      }
-    }
+
+  // Scan immune spaces.
+  // Update all the fields in the immune spaces first without graying the objects so that we
+  // minimize dirty pages in the immune spaces. Note mutators can concurrently access and gray some
+  // of the objects.
+  if (kUseBakerReadBarrier) {
+    gc_grays_immune_objects_ = false;
   }
+  for (auto& space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    ImmuneSpaceScanObjVisitor visitor(this);
+    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                  reinterpret_cast<uintptr_t>(space->Limit()),
+                                  visitor);
+  }
+  if (kUseBakerReadBarrier) {
+    // This release fence makes the field updates in the above loop visible before allowing mutator
+    // getting access to immune objects without graying it first.
+    updated_all_immune_objects_.StoreRelease(true);
+    // Now whiten immune objects concurrently accessed and grayed by mutators. We can't do this in
+    // the above loop because we would incorrectly disable the read barrier by whitening an object
+    // which may point to an unscanned, white object, breaking the to-space invariant.
+    //
+    // Make sure no mutators are in the middle of marking an immune object before whitening immune
+    // objects.
+    IssueEmptyCheckpoint();
+    MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
+    if (kVerboseMode) {
+      LOG(INFO) << "immune gray stack size=" << immune_gray_stack_.size();
+    }
+    for (mirror::Object* obj : immune_gray_stack_) {
+      DCHECK(obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
+                                                      ReadBarrier::WhitePtr());
+      DCHECK(success);
+    }
+    immune_gray_stack_.clear();
+  }
+
   {
     TimingLogger::ScopedTiming split2("VisitConcurrentRoots", GetTimings());
     Runtime::Current()->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
@@ -383,16 +403,6 @@
     Runtime::Current()->VisitNonThreadRoots(this);
   }
 
-  // Immune spaces.
-  for (auto& space : immune_spaces_.GetSpaces()) {
-    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-    ImmuneSpaceObjVisitor visitor(this);
-    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                  reinterpret_cast<uintptr_t>(space->Limit()),
-                                  visitor);
-  }
-
   Thread* self = Thread::Current();
   {
     TimingLogger::ScopedTiming split7("ProcessMarkStack", GetTimings());
@@ -1239,6 +1249,9 @@
     IssueEmptyCheckpoint();
     // Disable the check.
     is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(0);
+    if (kUseBakerReadBarrier) {
+      updated_all_immune_objects_.StoreSequentiallyConsistent(false);
+    }
     CheckEmptyMarkStack();
   }
 
@@ -1288,13 +1301,9 @@
     SwapBitmaps();
     heap_->UnBindBitmaps();
 
-    // Remove bitmaps for the immune spaces.
-    while (!cc_bitmaps_.empty()) {
-      accounting::ContinuousSpaceBitmap* cc_bitmap = cc_bitmaps_.back();
-      cc_heap_bitmap_->RemoveContinuousSpaceBitmap(cc_bitmap);
-      delete cc_bitmap;
-      cc_bitmaps_.pop_back();
-    }
+    // Delete the region bitmap.
+    DCHECK(region_space_bitmap_ != nullptr);
+    delete region_space_bitmap_;
     region_space_bitmap_ = nullptr;
   }
 
@@ -1410,15 +1419,6 @@
     // In a non-moving space.
     if (immune_spaces_.ContainsObject(obj)) {
       LOG(INFO) << "holder is in an immune image or the zygote space.";
-      accounting::ContinuousSpaceBitmap* cc_bitmap =
-          cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
-      CHECK(cc_bitmap != nullptr)
-          << "An immune space object must have a bitmap.";
-      if (cc_bitmap->Test(obj)) {
-        LOG(INFO) << "holder is marked in the bit map.";
-      } else {
-        LOG(INFO) << "holder is NOT marked in the bit map.";
-      }
     } else {
       LOG(INFO) << "holder is in a non-immune, non-moving (or main) space.";
       accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -1449,17 +1449,17 @@
                                                                mirror::Object* ref) {
   // In a non-moving spaces. Check that the ref is marked.
   if (immune_spaces_.ContainsObject(ref)) {
-    accounting::ContinuousSpaceBitmap* cc_bitmap =
-        cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
-    CHECK(cc_bitmap != nullptr)
-        << "An immune space ref must have a bitmap. " << ref;
     if (kUseBakerReadBarrier) {
-      CHECK(cc_bitmap->Test(ref))
+      // Immune object may not be gray if called from the GC.
+      if (Thread::Current() == thread_running_gc_ && !gc_grays_immune_objects_) {
+        return;
+      }
+      bool updated_all_immune_objects = updated_all_immune_objects_.LoadSequentiallyConsistent();
+      CHECK(updated_all_immune_objects || ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
           << "Unmarked immune space ref. obj=" << obj << " rb_ptr="
-          << obj->GetReadBarrierPointer() << " ref=" << ref;
-    } else {
-      CHECK(cc_bitmap->Test(ref))
-          << "Unmarked immune space ref. obj=" << obj << " ref=" << ref;
+          << (obj != nullptr ? obj->GetReadBarrierPointer() : nullptr)
+          << " ref=" << ref << " ref rb_ptr=" << ref->GetReadBarrierPointer()
+          << " updated_all_immune_objects=" << updated_all_immune_objects;
     }
   } else {
     accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -1510,7 +1510,7 @@
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
       ALWAYS_INLINE
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    collector_->MarkRoot(root);
+    collector_->MarkRoot</*kGrayImmuneObject*/false>(root);
   }
 
  private:
@@ -1520,6 +1520,7 @@
 // Scan ref fields of an object.
 inline void ConcurrentCopying::Scan(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
+  DCHECK_EQ(Thread::Current(), thread_running_gc_);
   RefFieldsVisitor visitor(this);
   // Disable the read barrier for a performance reason.
   to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
@@ -1528,9 +1529,10 @@
 
 // Process a field.
 inline void ConcurrentCopying::Process(mirror::Object* obj, MemberOffset offset) {
+  DCHECK_EQ(Thread::Current(), thread_running_gc_);
   mirror::Object* ref = obj->GetFieldObject<
       mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset);
-  mirror::Object* to_ref = Mark(ref);
+  mirror::Object* to_ref = Mark</*kGrayImmuneObject*/false>(ref);
   if (to_ref == ref) {
     return;
   }
@@ -1569,10 +1571,11 @@
   }
 }
 
+template<bool kGrayImmuneObject>
 inline void ConcurrentCopying::MarkRoot(mirror::CompressedReference<mirror::Object>* root) {
   DCHECK(!root->IsNull());
   mirror::Object* const ref = root->AsMirrorPtr();
-  mirror::Object* to_ref = Mark(ref);
+  mirror::Object* to_ref = Mark<kGrayImmuneObject>(ref);
   if (to_ref != ref) {
     auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
     auto expected_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref);
@@ -1593,14 +1596,46 @@
   for (size_t i = 0; i < count; ++i) {
     mirror::CompressedReference<mirror::Object>* const root = roots[i];
     if (!root->IsNull()) {
-      MarkRoot(root);
+      // kGrayImmuneObject is true because this is used for the thread flip.
+      MarkRoot</*kGrayImmuneObject*/true>(root);
     }
   }
 }
 
+// Temporary set gc_grays_immune_objects_ to true in a scope if the current thread is GC.
+class ConcurrentCopying::ScopedGcGraysImmuneObjects {
+ public:
+  explicit ScopedGcGraysImmuneObjects(ConcurrentCopying* collector)
+      : collector_(collector), enabled_(false) {
+    if (kUseBakerReadBarrier &&
+        collector_->thread_running_gc_ == Thread::Current() &&
+        !collector_->gc_grays_immune_objects_) {
+      collector_->gc_grays_immune_objects_ = true;
+      enabled_ = true;
+    }
+  }
+
+  ~ScopedGcGraysImmuneObjects() {
+    if (kUseBakerReadBarrier &&
+        collector_->thread_running_gc_ == Thread::Current() &&
+        enabled_) {
+      DCHECK(collector_->gc_grays_immune_objects_);
+      collector_->gc_grays_immune_objects_ = false;
+    }
+  }
+
+ private:
+  ConcurrentCopying* const collector_;
+  bool enabled_;
+};
+
 // Fill the given memory block with a dummy object. Used to fill in a
 // copy of objects that was lost in race.
 void ConcurrentCopying::FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size) {
+  // GC doesn't gray immune objects while scanning immune objects. But we need to trigger the read
+  // barriers here because we need the updated reference to the int array class, etc. Temporary set
+  // gc_grays_immune_objects_ to true so that we won't cause a DCHECK failure in MarkImmuneSpace().
+  ScopedGcGraysImmuneObjects scoped_gc_gray_immune_objects(this);
   CHECK_ALIGNED(byte_size, kObjectAlignment);
   memset(dummy_obj, 0, byte_size);
   mirror::Class* int_array_class = mirror::IntArray::GetArrayClass();
@@ -1836,21 +1871,8 @@
   } else {
     // from_ref is in a non-moving space.
     if (immune_spaces_.ContainsObject(from_ref)) {
-      accounting::ContinuousSpaceBitmap* cc_bitmap =
-          cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
-      DCHECK(cc_bitmap != nullptr)
-          << "An immune space object must have a bitmap";
-      if (kIsDebugBuild) {
-        DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref)->Test(from_ref))
-            << "Immune space object must be already marked";
-      }
-      if (cc_bitmap->Test(from_ref)) {
-        // Already marked.
-        to_ref = from_ref;
-      } else {
-        // Newly marked.
-        to_ref = nullptr;
-      }
+      // An immune object is alive.
+      to_ref = from_ref;
     } else {
       // Non-immune non-moving space. Use the mark bitmap.
       accounting::ContinuousSpaceBitmap* mark_bitmap =
@@ -1889,85 +1911,74 @@
 mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) {
   // ref is in a non-moving space (from_ref == to_ref).
   DCHECK(!region_space_->HasAddress(ref)) << ref;
-  if (immune_spaces_.ContainsObject(ref)) {
-    accounting::ContinuousSpaceBitmap* cc_bitmap =
-        cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
-    DCHECK(cc_bitmap != nullptr)
-        << "An immune space object must have a bitmap";
-    if (kIsDebugBuild) {
-      DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(ref)->Test(ref))
-          << "Immune space object must be already marked";
+  DCHECK(!immune_spaces_.ContainsObject(ref));
+  // Use the mark bitmap.
+  accounting::ContinuousSpaceBitmap* mark_bitmap =
+      heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
+  accounting::LargeObjectBitmap* los_bitmap =
+      heap_mark_bitmap_->GetLargeObjectBitmap(ref);
+  CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
+  bool is_los = mark_bitmap == nullptr;
+  if (!is_los && mark_bitmap->Test(ref)) {
+    // Already marked.
+    if (kUseBakerReadBarrier) {
+      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
     }
-    MarkUnevacFromSpaceRegionOrImmuneSpace(ref, cc_bitmap);
+  } else if (is_los && los_bitmap->Test(ref)) {
+    // Already marked in LOS.
+    if (kUseBakerReadBarrier) {
+      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
+             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+    }
   } else {
-    // Use the mark bitmap.
-    accounting::ContinuousSpaceBitmap* mark_bitmap =
-        heap_mark_bitmap_->GetContinuousSpaceBitmap(ref);
-    accounting::LargeObjectBitmap* los_bitmap =
-        heap_mark_bitmap_->GetLargeObjectBitmap(ref);
-    CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range";
-    bool is_los = mark_bitmap == nullptr;
-    if (!is_los && mark_bitmap->Test(ref)) {
-      // Already marked.
-      if (kUseBakerReadBarrier) {
-        DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-               ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+    // Not marked.
+    if (IsOnAllocStack(ref)) {
+      // If it's on the allocation stack, it's considered marked. Keep it white.
+      // Objects on the allocation stack need not be marked.
+      if (!is_los) {
+        DCHECK(!mark_bitmap->Test(ref));
+      } else {
+        DCHECK(!los_bitmap->Test(ref));
       }
-    } else if (is_los && los_bitmap->Test(ref)) {
-      // Already marked in LOS.
       if (kUseBakerReadBarrier) {
-        DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-               ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
       }
     } else {
-      // Not marked.
-      if (IsOnAllocStack(ref)) {
-        // If it's on the allocation stack, it's considered marked. Keep it white.
-        // Objects on the allocation stack need not be marked.
-        if (!is_los) {
-          DCHECK(!mark_bitmap->Test(ref));
-        } else {
-          DCHECK(!los_bitmap->Test(ref));
+      // For the baker-style RB, we need to handle 'false-gray' cases. See the
+      // kRegionTypeUnevacFromSpace-case comment in Mark().
+      if (kUseBakerReadBarrier) {
+        // Test the bitmap first to reduce the chance of false gray cases.
+        if ((!is_los && mark_bitmap->Test(ref)) ||
+            (is_los && los_bitmap->Test(ref))) {
+          return ref;
         }
-        if (kUseBakerReadBarrier) {
-          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+      }
+      // Not marked or on the allocation stack. Try to mark it.
+      // This may or may not succeed, which is ok.
+      bool cas_success = false;
+      if (kUseBakerReadBarrier) {
+        cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
+                                                       ReadBarrier::GrayPtr());
+      }
+      if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
+        // Already marked.
+        if (kUseBakerReadBarrier && cas_success &&
+            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+          PushOntoFalseGrayStack(ref);
+        }
+      } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
+        // Already marked in LOS.
+        if (kUseBakerReadBarrier && cas_success &&
+            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+          PushOntoFalseGrayStack(ref);
         }
       } else {
-        // For the baker-style RB, we need to handle 'false-gray' cases. See the
-        // kRegionTypeUnevacFromSpace-case comment in Mark().
+        // Newly marked.
         if (kUseBakerReadBarrier) {
-          // Test the bitmap first to reduce the chance of false gray cases.
-          if ((!is_los && mark_bitmap->Test(ref)) ||
-              (is_los && los_bitmap->Test(ref))) {
-            return ref;
-          }
+          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
         }
-        // Not marked or on the allocation stack. Try to mark it.
-        // This may or may not succeed, which is ok.
-        bool cas_success = false;
-        if (kUseBakerReadBarrier) {
-          cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
-                                                         ReadBarrier::GrayPtr());
-        }
-        if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
-          // Already marked.
-          if (kUseBakerReadBarrier && cas_success &&
-              ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
-            PushOntoFalseGrayStack(ref);
-          }
-        } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
-          // Already marked in LOS.
-          if (kUseBakerReadBarrier && cas_success &&
-              ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
-            PushOntoFalseGrayStack(ref);
-          }
-        } else {
-          // Newly marked.
-          if (kUseBakerReadBarrier) {
-            DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
-          }
-          PushOntoMarkStack(ref);
-        }
+        PushOntoMarkStack(ref);
       }
     }
   }
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index a986a7a..166a1f0 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -61,10 +61,12 @@
   ConcurrentCopying(Heap* heap, const std::string& name_prefix = "");
   ~ConcurrentCopying();
 
-  virtual void RunPhases() OVERRIDE REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
-  void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
+  virtual void RunPhases() OVERRIDE
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_);
   void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
   void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
 
@@ -92,8 +94,9 @@
     DCHECK(ref != nullptr);
     return IsMarked(ref) == ref;
   }
+  template<bool kGrayImmuneObject = true>
   ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   bool IsMarking() const {
     return is_marking_;
   }
@@ -117,16 +120,19 @@
   void Scan(mirror::Object* to_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
   void Process(mirror::Object* obj, MemberOffset offset)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_ , !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_ , !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
+  template<bool kGrayImmuneObject>
   void MarkRoot(mirror::CompressedReference<mirror::Object>* root)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
                           const RootInfo& info)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   void VerifyNoFromSpaceReferences() REQUIRES(Locks::mutator_lock_);
   accounting::ObjectStack* GetAllocationStack();
   accounting::ObjectStack* GetLiveStack();
@@ -146,9 +152,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void ProcessReferences(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
   virtual mirror::Object* MarkObject(mirror::Object* from_ref) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* from_ref) OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_);
   virtual mirror::Object* IsMarked(mirror::Object* from_ref) OVERRIDE
       SHARED_REQUIRES(Locks::mutator_lock_);
   virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) OVERRIDE
@@ -182,14 +190,19 @@
   void ExpandGcMarkStack() SHARED_REQUIRES(Locks::mutator_lock_);
   mirror::Object* MarkNonMoving(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
-  ALWAYS_INLINE mirror::Object* MarkUnevacFromSpaceRegionOrImmuneSpace(mirror::Object* from_ref,
+  ALWAYS_INLINE mirror::Object* MarkUnevacFromSpaceRegion(mirror::Object* from_ref,
       accounting::SpaceBitmap<kObjectAlignment>* bitmap)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
+  template<bool kGrayImmuneObject>
+  ALWAYS_INLINE mirror::Object* MarkImmuneSpace(mirror::Object* from_ref)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!immune_gray_stack_lock_);
   void PushOntoFalseGrayStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
   void ProcessFalseGrayStack() SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
+  void ScanImmuneObject(mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_);
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;
@@ -207,8 +220,6 @@
   bool is_active_;                        // True while the collection is ongoing.
   bool is_asserting_to_space_invariant_;  // True while asserting the to-space invariant.
   ImmuneSpaces immune_spaces_;
-  std::unique_ptr<accounting::HeapBitmap> cc_heap_bitmap_;
-  std::vector<accounting::SpaceBitmap<kObjectAlignment>*> cc_bitmaps_;
   accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_;
   // A cache of Heap::GetMarkBitmap().
   accounting::HeapBitmap* heap_mark_bitmap_;
@@ -242,6 +253,10 @@
 
   accounting::ReadBarrierTable* rb_table_;
   bool force_evacuate_all_;  // True if all regions are evacuated.
+  Atomic<bool> updated_all_immune_objects_;
+  bool gc_grays_immune_objects_;
+  Mutex immune_gray_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::vector<mirror::Object*> immune_gray_stack_ GUARDED_BY(immune_gray_stack_lock_);
 
   class AssertToSpaceInvariantFieldVisitor;
   class AssertToSpaceInvariantObjectVisitor;
@@ -250,14 +265,15 @@
   class ComputeUnevacFromSpaceLiveRatioVisitor;
   class DisableMarkingCheckpoint;
   class FlipCallback;
-  class ImmuneSpaceObjVisitor;
+  class ImmuneSpaceScanObjVisitor;
   class LostCopyVisitor;
   class RefFieldsVisitor;
   class RevokeThreadLocalMarkStackCheckpoint;
+  class ScopedGcGraysImmuneObjects;
+  class ThreadFlipVisitor;
   class VerifyNoFromSpaceRefsFieldVisitor;
   class VerifyNoFromSpaceRefsObjectVisitor;
   class VerifyNoFromSpaceRefsVisitor;
-  class ThreadFlipVisitor;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(ConcurrentCopying);
 };
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 8cadc2e..e896c7a 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1130,10 +1130,6 @@
       image_header.VisitPackedArtFields(&field_visitor, target_base);
     }
     {
-      TimingLogger::ScopedTiming timing("Fixup imt", &logger);
-      image_header.VisitPackedImTables(fixup_adapter, target_base, pointer_size);
-    }
-    {
       TimingLogger::ScopedTiming timing("Fixup conflict tables", &logger);
       image_header.VisitPackedImtConflictTables(fixup_adapter, target_base, pointer_size);
     }
diff --git a/runtime/globals.h b/runtime/globals.h
index 477cbdf..0b44c47 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -40,6 +40,12 @@
 // compile-time constant so the compiler can generate better code.
 static constexpr int kPageSize = 4096;
 
+// Returns whether the given memory offset can be used for generating
+// an implicit null check.
+static inline bool CanDoImplicitNullCheckOn(uintptr_t offset) {
+  return offset < kPageSize;
+}
+
 // Required object alignment
 static constexpr size_t kObjectAlignment = 8;
 static constexpr size_t kLargeObjectAlignment = kPageSize;
diff --git a/runtime/image-inl.h b/runtime/image-inl.h
index cd0557a..ea75a62 100644
--- a/runtime/image-inl.h
+++ b/runtime/image-inl.h
@@ -20,7 +20,6 @@
 #include "image.h"
 
 #include "art_method.h"
-#include "imtable.h"
 
 namespace art {
 
@@ -46,24 +45,6 @@
 }
 
 template <typename Visitor>
-inline void ImageHeader::VisitPackedImTables(const Visitor& visitor,
-                                             uint8_t* base,
-                                             size_t pointer_size) const {
-  const ImageSection& section = GetImageSection(kSectionImTables);
-  for (size_t pos = 0; pos < section.Size();) {
-    ImTable* imt = reinterpret_cast<ImTable*>(base + section.Offset() + pos);
-    for (size_t i = 0; i < ImTable::kSize; ++i) {
-      ArtMethod* orig = imt->Get(i, pointer_size);
-      ArtMethod* updated = visitor(orig);
-      if (updated != orig) {
-        imt->Set(i, updated, pointer_size);
-      }
-    }
-    pos += ImTable::SizeInBytes(pointer_size);
-  }
-}
-
-template <typename Visitor>
 inline void ImageHeader::VisitPackedImtConflictTables(const Visitor& visitor,
                                                       uint8_t* base,
                                                       size_t pointer_size) const {
diff --git a/runtime/image.cc b/runtime/image.cc
index 2362a92..a9552c2 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '0', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '9', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/image.h b/runtime/image.h
index 06f06ee..2ea9af7 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -195,7 +195,6 @@
     kSectionArtFields,
     kSectionArtMethods,
     kSectionRuntimeMethods,
-    kSectionImTables,
     kSectionIMTConflictTables,
     kSectionDexCacheArrays,
     kSectionInternedStrings,
@@ -280,11 +279,6 @@
   void VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const;
 
   template <typename Visitor>
-  void VisitPackedImTables(const Visitor& visitor,
-                           uint8_t* base,
-                           size_t pointer_size) const;
-
-  template <typename Visitor>
   void VisitPackedImtConflictTables(const Visitor& visitor,
                                     uint8_t* base,
                                     size_t pointer_size) const;
diff --git a/runtime/imtable.h b/runtime/imtable.h
deleted file mode 100644
index 51faf70..0000000
--- a/runtime/imtable.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_IMTABLE_H_
-#define ART_RUNTIME_IMTABLE_H_
-
-#ifndef IMT_SIZE
-#error IMT_SIZE not defined
-#endif
-
-namespace art {
-
-class ArtMethod;
-
-class ImTable {
- public:
-  // Interface method table size. Increasing this value reduces the chance of two interface methods
-  // colliding in the interface method table but increases the size of classes that implement
-  // (non-marker) interfaces.
-  static constexpr size_t kSize = IMT_SIZE;
-
-  ArtMethod* Get(size_t index, size_t pointer_size) {
-    DCHECK_LT(index, kSize);
-    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
-    if (pointer_size == 4) {
-      uint32_t value = *reinterpret_cast<uint32_t*>(ptr);
-      return reinterpret_cast<ArtMethod*>(value);
-    } else {
-      uint64_t value = *reinterpret_cast<uint64_t*>(ptr);
-      return reinterpret_cast<ArtMethod*>(value);
-    }
-  }
-
-  void Set(size_t index, ArtMethod* method, size_t pointer_size) {
-    DCHECK_LT(index, kSize);
-    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
-    if (pointer_size == 4) {
-      uintptr_t value = reinterpret_cast<uintptr_t>(method);
-      DCHECK_EQ(static_cast<uint32_t>(value), value);  // Check that we dont lose any non 0 bits.
-      *reinterpret_cast<uint32_t*>(ptr) = static_cast<uint32_t>(value);
-    } else {
-      *reinterpret_cast<uint64_t*>(ptr) = reinterpret_cast<uint64_t>(method);
-    }
-  }
-
-  static size_t OffsetOfElement(size_t index, size_t pointer_size) {
-    return index * pointer_size;
-  }
-
-  void Populate(ArtMethod** data, size_t pointer_size) {
-    for (size_t i = 0; i < kSize; ++i) {
-      Set(i, data[i], pointer_size);
-    }
-  }
-
-  constexpr static size_t SizeInBytes(size_t pointer_size) {
-    return kSize * pointer_size;
-  }
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_IMTABLE_H_
-
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 3750b7a..cc470f3 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -679,7 +679,7 @@
     return false;
   }
   const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK(receiver->GetClass()->ShouldHaveEmbeddedVTable());
+  CHECK(receiver->GetClass()->ShouldHaveEmbeddedImtAndVTable());
   ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
       vtable_idx, sizeof(void*));
   if (UNLIKELY(called_method == nullptr)) {
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ae5a0f6..cfe6cd1 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -59,8 +59,8 @@
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
-  jit_options->save_profiling_info_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);
+  jit_options->profile_saver_options_ =
+      options.GetOrDefault(RuntimeArgumentMap::ProfileSaverOpts);
 
   jit_options->compile_threshold_ = options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
   if (jit_options->compile_threshold_ > std::numeric_limits<uint16_t>::max()) {
@@ -144,11 +144,10 @@
              cumulative_timings_("JIT timings"),
              memory_use_("Memory used for compilation", 16),
              lock_("JIT memory use lock"),
-             use_jit_compilation_(true),
-             save_profiling_info_(false) {}
+             use_jit_compilation_(true) {}
 
 Jit* Jit::Create(JitOptions* options, std::string* error_msg) {
-  DCHECK(options->UseJitCompilation() || options->GetSaveProfilingInfo());
+  DCHECK(options->UseJitCompilation() || options->GetProfileSaverOptions().IsEnabled());
   std::unique_ptr<Jit> jit(new Jit);
   jit->dump_info_on_shutdown_ = options->DumpJitInfoOnShutdown();
   if (jit_compiler_handle_ == nullptr && !LoadCompiler(error_msg)) {
@@ -163,12 +162,12 @@
     return nullptr;
   }
   jit->use_jit_compilation_ = options->UseJitCompilation();
-  jit->save_profiling_info_ = options->GetSaveProfilingInfo();
+  jit->profile_saver_options_ = options->GetProfileSaverOptions();
   VLOG(jit) << "JIT created with initial_capacity="
       << PrettySize(options->GetCodeCacheInitialCapacity())
       << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
       << ", compile_threshold=" << options->GetCompileThreshold()
-      << ", save_profiling_info=" << options->GetSaveProfilingInfo();
+      << ", profile_saver_options=" << options->GetProfileSaverOptions();
 
 
   jit->hot_method_threshold_ = options->GetCompileThreshold();
@@ -310,13 +309,18 @@
                             const std::vector<std::string>& code_paths,
                             const std::string& foreign_dex_profile_path,
                             const std::string& app_dir) {
-  if (save_profiling_info_) {
-    ProfileSaver::Start(filename, code_cache_.get(), code_paths, foreign_dex_profile_path, app_dir);
+  if (profile_saver_options_.IsEnabled()) {
+    ProfileSaver::Start(profile_saver_options_,
+                        filename,
+                        code_cache_.get(),
+                        code_paths,
+                        foreign_dex_profile_path,
+                        app_dir);
   }
 }
 
 void Jit::StopProfileSaver() {
-  if (save_profiling_info_ && ProfileSaver::IsStarted()) {
+  if (profile_saver_options_.IsEnabled() && ProfileSaver::IsStarted()) {
     ProfileSaver::Stop(dump_info_on_shutdown_);
   }
 }
@@ -330,7 +334,7 @@
 }
 
 Jit::~Jit() {
-  DCHECK(!save_profiling_info_ || !ProfileSaver::IsStarted());
+  DCHECK(!profile_saver_options_.IsEnabled() || !ProfileSaver::IsStarted());
   if (dump_info_on_shutdown_) {
     DumpInfo(LOG(INFO));
   }
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index f3a6240..2aa6f3d 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -24,6 +24,7 @@
 #include "base/timing_logger.h"
 #include "object_callbacks.h"
 #include "offline_profiling_info.h"
+#include "jit/profile_saver_options.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -92,8 +93,8 @@
     return use_jit_compilation_;
   }
 
-  bool SaveProfilingInfo() const {
-    return save_profiling_info_;
+  bool GetSaveProfilingInfo() const {
+    return profile_saver_options_.IsEnabled();
   }
 
   // Wait until there is no more pending compilation tasks.
@@ -189,7 +190,7 @@
   std::unique_ptr<jit::JitCodeCache> code_cache_;
 
   bool use_jit_compilation_;
-  bool save_profiling_info_;
+  ProfileSaverOptions profile_saver_options_;
   static bool generate_debug_info_;
   uint16_t hot_method_threshold_;
   uint16_t warm_method_threshold_;
@@ -228,8 +229,11 @@
   bool DumpJitInfoOnShutdown() const {
     return dump_info_on_shutdown_;
   }
+  const ProfileSaverOptions& GetProfileSaverOptions() const {
+    return profile_saver_options_;
+  }
   bool GetSaveProfilingInfo() const {
-    return save_profiling_info_;
+    return profile_saver_options_.IsEnabled();
   }
   bool UseJitCompilation() const {
     return use_jit_compilation_;
@@ -237,8 +241,8 @@
   void SetUseJitCompilation(bool b) {
     use_jit_compilation_ = b;
   }
-  void SetSaveProfilingInfo(bool b) {
-    save_profiling_info_ = b;
+  void SetSaveProfilingInfo(bool save_profiling_info) {
+    profile_saver_options_.SetEnabled(save_profiling_info);
   }
   void SetJitAtFirstUse() {
     use_jit_compilation_ = true;
@@ -255,15 +259,14 @@
   uint16_t priority_thread_weight_;
   size_t invoke_transition_weight_;
   bool dump_info_on_shutdown_;
-  bool save_profiling_info_;
+  ProfileSaverOptions profile_saver_options_;
 
   JitOptions()
       : use_jit_compilation_(false),
         code_cache_initial_capacity_(0),
         code_cache_max_capacity_(0),
         compile_threshold_(0),
-        dump_info_on_shutdown_(false),
-        save_profiling_info_(false) { }
+        dump_info_on_shutdown_(false) {}
 
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index c99d363..5039d2d 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -19,6 +19,7 @@
 #include "errno.h"
 #include <limits.h>
 #include <vector>
+#include <stdlib.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
@@ -40,6 +41,11 @@
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
+// Debug flag to ignore checksums when testing if a method or a class is present in the profile.
+// Used to facilitate testing profile guided compilation across a large number of apps
+// using the same test profile.
+static constexpr bool kDebugIgnoreChecksum = false;
+
 // Transform the actual dex location into relative paths.
 // Note: this is OK because we don't store profiles of different apps into the same file.
 // Apps with split apks don't cause trouble because each split has a different name and will not
@@ -547,10 +553,14 @@
   return true;
 }
 
+static bool ChecksumMatch(const DexFile& dex_file, uint32_t checksum) {
+  return kDebugIgnoreChecksum || dex_file.GetLocationChecksum() == checksum;
+}
+
 bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
   auto info_it = info_.find(GetProfileDexFileKey(method_ref.dex_file->GetLocation()));
   if (info_it != info_.end()) {
-    if (method_ref.dex_file->GetLocationChecksum() != info_it->second.checksum) {
+    if (!ChecksumMatch(*method_ref.dex_file, info_it->second.checksum)) {
       return false;
     }
     const std::set<uint16_t>& methods = info_it->second.method_set;
@@ -562,7 +572,7 @@
 bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, uint16_t class_def_idx) const {
   auto info_it = info_.find(GetProfileDexFileKey(dex_file.GetLocation()));
   if (info_it != info_.end()) {
-    if (dex_file.GetLocationChecksum() != info_it->second.checksum) {
+    if (!ChecksumMatch(dex_file, info_it->second.checksum)) {
       return false;
     }
     const std::set<uint16_t>& classes = info_it->second.class_set;
@@ -659,4 +669,47 @@
   }
 }
 
+// Naive implementation to generate a random profile file suitable for testing.
+bool ProfileCompilationInfo::GenerateTestProfile(int fd,
+                                                 uint16_t number_of_dex_files,
+                                                 uint16_t method_ratio,
+                                                 uint16_t class_ratio) {
+  const std::string base_dex_location = "base.apk";
+  ProfileCompilationInfo info;
+  // The limits are defined by the dex specification.
+  uint16_t max_method = std::numeric_limits<uint16_t>::max();
+  uint16_t max_classes = std::numeric_limits<uint16_t>::max();
+  uint16_t number_of_methods = max_method * method_ratio / 100;
+  uint16_t number_of_classes = max_classes * class_ratio / 100;
+
+  srand(MicroTime());
+
+  // Make sure we generate more samples with a low index value.
+  // This makes it more likely to hit valid method/class indices in small apps.
+  const uint16_t kFavorFirstN = 10000;
+  const uint16_t kFavorSplit = 2;
+
+  for (uint16_t i = 0; i < number_of_dex_files; i++) {
+    std::string dex_location = DexFile::GetMultiDexLocation(i, base_dex_location.c_str());
+    std::string profile_key = GetProfileDexFileKey(dex_location);
+
+    for (uint16_t m = 0; m < number_of_methods; m++) {
+      uint16_t method_idx = rand() % max_method;
+      if (m < (number_of_methods / kFavorSplit)) {
+        method_idx %= kFavorFirstN;
+      }
+      info.AddMethodIndex(profile_key, 0, method_idx);
+    }
+
+    for (uint16_t c = 0; c < number_of_classes; c++) {
+      uint16_t class_idx = rand() % max_classes;
+      if (c < (number_of_classes / kFavorSplit)) {
+        class_idx %= kFavorFirstN;
+      }
+      info.AddClassIndex(profile_key, 0, class_idx);
+    }
+  }
+  return info.Save(fd);
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 5a07da7..0b26f9b 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -87,6 +87,11 @@
   // Clears the resolved classes from the current object.
   void ClearResolvedClasses();
 
+  static bool GenerateTestProfile(int fd,
+                                  uint16_t number_of_dex_files,
+                                  uint16_t method_ratio,
+                                  uint16_t class_ratio);
+
  private:
   enum ProfileLoadSatus {
     kProfileLoadIOError,
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 9822f6e..4d4d1ea 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -30,25 +30,11 @@
 
 namespace art {
 
-// TODO: read the constants from ProfileOptions,
-// Add a random delay each time we go to sleep so that we don't hammer the CPU
-// with all profile savers running at the same time.
-static constexpr const uint64_t kMinSavePeriodNs = MsToNs(20 * 1000);  // 20 seconds
-static constexpr const uint64_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
-// Minimum number of JIT samples during launch to include a method into the profile.
-static constexpr const size_t kStartupMethodSamples = 1;
-
-static constexpr const uint32_t kMinimumNumberOfMethodsToSave = 10;
-static constexpr const uint32_t kMinimumNumberOfClassesToSave = 10;
-static constexpr const uint32_t kMinimumNumberOfNotificationBeforeWake =
-    kMinimumNumberOfMethodsToSave;
-static constexpr const uint32_t kMaximumNumberOfNotificationBeforeWake = 50;
-
-
 ProfileSaver* ProfileSaver::instance_ = nullptr;
 pthread_t ProfileSaver::profiler_pthread_ = 0U;
 
-ProfileSaver::ProfileSaver(const std::string& output_filename,
+ProfileSaver::ProfileSaver(const ProfileSaverOptions& options,
+                           const std::string& output_filename,
                            jit::JitCodeCache* jit_code_cache,
                            const std::vector<std::string>& code_paths,
                            const std::string& foreign_dex_profile_path,
@@ -72,7 +58,9 @@
       total_number_of_foreign_dex_marks_(0),
       max_number_of_profile_entries_cached_(0),
       total_number_of_hot_spikes_(0),
-      total_number_of_wake_ups_(0) {
+      total_number_of_wake_ups_(0),
+      options_(options) {
+  DCHECK(options_.IsEnabled());
   AddTrackedLocations(output_filename, app_data_dir, code_paths);
   if (!app_data_dir.empty()) {
     // The application directory is used to determine which dex files are owned by app.
@@ -93,14 +81,13 @@
   Thread* self = Thread::Current();
 
   // Fetch the resolved classes for the app images after sleeping for
-  // kSaveResolvedClassesDelayMs.
+  // options_.GetSaveResolvedClassesDelayMs().
   // TODO(calin) This only considers the case of the primary profile file.
   // Anything that gets loaded in the same VM will not have their resolved
   // classes save (unless they started before the initial saving was done).
   {
     MutexLock mu(self, wait_lock_);
-    constexpr uint64_t kSleepTime = kSaveResolvedClassesDelayMs;
-    const uint64_t end_time = NanoTime() + MsToNs(kSleepTime);
+    const uint64_t end_time = NanoTime() + MsToNs(options_.GetSaveResolvedClassesDelayMs());
     while (true) {
       const uint64_t current_time = NanoTime();
       if (current_time >= end_time) {
@@ -108,7 +95,7 @@
       }
       period_condition_.TimedWait(self, NsToMs(end_time - current_time), 0);
     }
-    total_ms_of_sleep_ += kSaveResolvedClassesDelayMs;
+    total_ms_of_sleep_ += options_.GetSaveResolvedClassesDelayMs();
   }
   FetchAndCacheResolvedClassesAndMethods();
 
@@ -130,10 +117,11 @@
       // We might have been woken up by a huge number of notifications to guarantee saving.
       // If we didn't meet the minimum saving period go back to sleep (only if missed by
       // a reasonable margin).
-      while (kMinSavePeriodNs * 0.9 > sleep_time) {
+      uint64_t min_save_period_ns = MsToNs(options_.GetMinSavePeriodMs());
+      while (min_save_period_ns * 0.9 > sleep_time) {
         {
           MutexLock mu(self, wait_lock_);
-          period_condition_.TimedWait(self, NsToMs(kMinSavePeriodNs - sleep_time), 0);
+          period_condition_.TimedWait(self, NsToMs(min_save_period_ns - sleep_time), 0);
           sleep_time = NanoTime() - sleep_start;
         }
         // Check if the thread was woken up for shutdown.
@@ -183,12 +171,12 @@
   jit_activity_notifications_++;
   // Note that we are not as precise as we could be here but we don't want to wake the saver
   // every time we see a hot method.
-  if (jit_activity_notifications_ > kMinimumNumberOfNotificationBeforeWake) {
+  if (jit_activity_notifications_ > options_.GetMinNotificationBeforeWake()) {
     MutexLock wait_mutex(Thread::Current(), wait_lock_);
-    if ((NanoTime() - last_time_ns_saver_woke_up_) > kMinSavePeriodNs) {
+    if ((NanoTime() - last_time_ns_saver_woke_up_) > MsToNs(options_.GetMinSavePeriodMs())) {
       WakeUpSaver();
     }
-  } else if (jit_activity_notifications_ > kMaximumNumberOfNotificationBeforeWake) {
+  } else if (jit_activity_notifications_ > options_.GetMaxNotificationBeforeWake()) {
     // Make sure to wake up the saver if we see a spike in the number of notifications.
     // This is a precaution to avoid "loosing" a big number of methods in case
     // this is a spike with no jit after.
@@ -210,7 +198,9 @@
 // Excludes native methods and classes in the boot image.
 class GetMethodsVisitor : public ClassVisitor {
  public:
-  explicit GetMethodsVisitor(std::vector<MethodReference>* methods) : methods_(methods) {}
+  GetMethodsVisitor(std::vector<MethodReference>* methods, uint32_t startup_method_samples)
+    : methods_(methods),
+      startup_method_samples_(startup_method_samples) {}
 
   virtual bool operator()(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
     if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
@@ -218,7 +208,7 @@
     }
     for (ArtMethod& method : klass->GetMethods(sizeof(void*))) {
       if (!method.IsNative()) {
-        if (method.GetCounter() >= kStartupMethodSamples ||
+        if (method.GetCounter() >= startup_method_samples_ ||
             method.GetProfilingInfo(sizeof(void*)) != nullptr) {
           // Have samples, add to profile.
           const DexFile* dex_file = method.GetInterfaceMethodIfProxy(sizeof(void*))->GetDexFile();
@@ -231,6 +221,7 @@
 
  private:
   std::vector<MethodReference>* const methods_;
+  uint32_t startup_method_samples_;
 };
 
 void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() {
@@ -242,11 +233,11 @@
   std::vector<MethodReference> methods;
   {
     ScopedTrace trace2("Get hot methods");
-    GetMethodsVisitor visitor(&methods);
+    GetMethodsVisitor visitor(&methods, options_.GetStartupMethodSamples());
     ScopedObjectAccess soa(Thread::Current());
     class_linker->VisitClasses(&visitor);
     VLOG(profiler) << "Methods with samples greater than "
-                   << kStartupMethodSamples << " = " << methods.size();
+                   << options_.GetStartupMethodSamples() << " = " << methods.size();
   }
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   uint64_t total_number_of_profile_entries_cached = 0;
@@ -315,11 +306,11 @@
         cached_info->GetNumberOfResolvedClasses() -
         static_cast<int64_t>(last_save_number_of_classes_);
 
-    if (delta_number_of_methods < kMinimumNumberOfMethodsToSave &&
-        delta_number_of_classes < kMinimumNumberOfClassesToSave) {
+    if (delta_number_of_methods < options_.GetMinMethodsToSave() &&
+        delta_number_of_classes < options_.GetMinClassesToSave()) {
       VLOG(profiler) << "Not enough information to save to: " << filename
-          << " Nr of methods: " << delta_number_of_methods
-          << " Nr of classes: " << delta_number_of_classes;
+          << " Number of methods: " << delta_number_of_methods
+          << " Number of classes: " << delta_number_of_classes;
       total_number_of_skipped_writes_++;
       continue;
     }
@@ -398,12 +389,14 @@
   return true;
 }
 
-void ProfileSaver::Start(const std::string& output_filename,
+void ProfileSaver::Start(const ProfileSaverOptions& options,
+                         const std::string& output_filename,
                          jit::JitCodeCache* jit_code_cache,
                          const std::vector<std::string>& code_paths,
                          const std::string& foreign_dex_profile_path,
                          const std::string& app_data_dir) {
-  DCHECK(Runtime::Current()->SaveProfileInfo());
+  DCHECK(options.IsEnabled());
+  DCHECK(Runtime::Current()->GetJit() != nullptr);
   DCHECK(!output_filename.empty());
   DCHECK(jit_code_cache != nullptr);
 
@@ -433,7 +426,8 @@
   VLOG(profiler) << "Starting profile saver using output file: " << output_filename
       << ". Tracking: " << Join(code_paths_to_profile, ':');
 
-  instance_ = new ProfileSaver(output_filename,
+  instance_ = new ProfileSaver(options,
+                               output_filename,
                                jit_code_cache,
                                code_paths_to_profile,
                                foreign_dex_profile_path,
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index 9c6d0fa..59e2c94 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -20,6 +20,7 @@
 #include "base/mutex.h"
 #include "jit_code_cache.h"
 #include "offline_profiling_info.h"
+#include "profile_saver_options.h"
 #include "safe_map.h"
 
 namespace art {
@@ -28,7 +29,8 @@
  public:
   // Starts the profile saver thread if not already started.
   // If the saver is already running it adds (output_filename, code_paths) to its tracked locations.
-  static void Start(const std::string& output_filename,
+  static void Start(const ProfileSaverOptions& options,
+                    const std::string& output_filename,
                     jit::JitCodeCache* jit_code_cache,
                     const std::vector<std::string>& code_paths,
                     const std::string& foreign_dex_profile_path,
@@ -61,7 +63,8 @@
                             uint16_t method_idx);
 
  private:
-  ProfileSaver(const std::string& output_filename,
+  ProfileSaver(const ProfileSaverOptions& options,
+               const std::string& output_filename,
                jit::JitCodeCache* jit_code_cache,
                const std::vector<std::string>& code_paths,
                const std::string& foreign_dex_profile_path,
@@ -155,6 +158,7 @@
   uint64_t total_number_of_hot_spikes_;
   uint64_t total_number_of_wake_ups_;
 
+  const ProfileSaverOptions options_;
   DISALLOW_COPY_AND_ASSIGN(ProfileSaver);
 };
 
diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h
new file mode 100644
index 0000000..a6385d7
--- /dev/null
+++ b/runtime/jit/profile_saver_options.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILE_SAVER_OPTIONS_H_
+#define ART_RUNTIME_JIT_PROFILE_SAVER_OPTIONS_H_
+
+#include <string>
+#include <ostream>
+
+namespace art {
+
+struct ProfileSaverOptions {
+ public:
+  static constexpr uint32_t kMinSavePeriodMs = 20 * 1000;  // 20 seconds
+  static constexpr uint32_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
+  // Minimum number of JIT samples during launch to include a method into the profile.
+  static constexpr uint32_t kStartupMethodSamples = 1;
+  static constexpr uint32_t kMinMethodsToSave = 10;
+  static constexpr uint32_t kMinClassesToSave = 10;
+  static constexpr uint32_t kMinNotificationBeforeWake = 10;
+  static constexpr uint32_t kMaxNotificationBeforeWake = 50;
+
+  ProfileSaverOptions() :
+    enabled_(false),
+    min_save_period_ms_(kMinSavePeriodMs),
+    save_resolved_classes_delay_ms_(kSaveResolvedClassesDelayMs),
+    startup_method_samples_(kStartupMethodSamples),
+    min_methods_to_save_(kMinMethodsToSave),
+    min_classes_to_save_(kMinClassesToSave),
+    min_notification_before_wake_(kMinNotificationBeforeWake),
+    max_notification_before_wake_(kMaxNotificationBeforeWake) {}
+
+  ProfileSaverOptions(
+      bool enabled,
+      uint32_t min_save_period_ms,
+      uint32_t save_resolved_classes_delay_ms,
+      uint32_t startup_method_samples,
+      uint32_t min_methods_to_save,
+      uint32_t min_classes_to_save,
+      uint32_t min_notification_before_wake,
+      uint32_t max_notification_before_wake):
+    enabled_(enabled),
+    min_save_period_ms_(min_save_period_ms),
+    save_resolved_classes_delay_ms_(save_resolved_classes_delay_ms),
+    startup_method_samples_(startup_method_samples),
+    min_methods_to_save_(min_methods_to_save),
+    min_classes_to_save_(min_classes_to_save),
+    min_notification_before_wake_(min_notification_before_wake),
+    max_notification_before_wake_(max_notification_before_wake) {}
+
+  bool IsEnabled() const {
+    return enabled_;
+  }
+  void SetEnabled(bool enabled) {
+    enabled_ = enabled;
+  }
+
+  uint32_t GetMinSavePeriodMs() const {
+    return min_save_period_ms_;
+  }
+  uint32_t GetSaveResolvedClassesDelayMs() const {
+    return save_resolved_classes_delay_ms_;
+  }
+  uint32_t GetStartupMethodSamples() const {
+    return startup_method_samples_;
+  }
+  uint32_t GetMinMethodsToSave() const {
+    return min_methods_to_save_;
+  }
+  uint32_t GetMinClassesToSave() const {
+    return min_classes_to_save_;
+  }
+  uint32_t GetMinNotificationBeforeWake() const {
+    return min_notification_before_wake_;
+  }
+  uint32_t GetMaxNotificationBeforeWake() const {
+    return max_notification_before_wake_;
+  }
+
+  friend std::ostream & operator<<(std::ostream &os, const ProfileSaverOptions& pso) {
+    os << "enabled_" << pso.enabled_
+        << ", min_save_period_ms_" << pso.min_save_period_ms_
+        << ", save_resolved_classes_delay_ms_" << pso.save_resolved_classes_delay_ms_
+        << ", startup_method_samples_" << pso.startup_method_samples_
+        << ", min_methods_to_save_" << pso.min_methods_to_save_
+        << ", min_classes_to_save_" << pso.min_classes_to_save_
+        << ", min_notification_before_wake_" << pso.min_notification_before_wake_
+        << ", max_notification_before_wake_" << pso.max_notification_before_wake_;
+    return os;
+  }
+
+  bool enabled_;
+  uint32_t min_save_period_ms_;
+  uint32_t save_resolved_classes_delay_ms_;
+  uint32_t startup_method_samples_;
+  uint32_t min_methods_to_save_;
+  uint32_t min_classes_to_save_;
+  uint32_t min_notification_before_wake_;
+  uint32_t max_notification_before_wake_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_PROFILE_SAVER_OPTIONS_H_
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index b783a01..cefd9f0 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -247,19 +247,38 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), new_vtable);
 }
 
+inline MemberOffset Class::EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size) {
+  DCHECK_LT(i, kImtSize);
+  return MemberOffset(
+      EmbeddedImTableOffset(pointer_size).Uint32Value() + i * ImTableEntrySize(pointer_size));
+}
+
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline ArtMethod* Class::GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size) {
+  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
+  return GetFieldPtrWithSize<ArtMethod*>(
+      EmbeddedImTableEntryOffset(i, pointer_size), pointer_size);
+}
+
+template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline void Class::SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size) {
+  DCHECK((ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()));
+  SetFieldPtrWithSize<false>(EmbeddedImTableEntryOffset(i, pointer_size), method, pointer_size);
+}
+
 inline bool Class::HasVTable() {
-  return GetVTable() != nullptr || ShouldHaveEmbeddedVTable();
+  return GetVTable() != nullptr || ShouldHaveEmbeddedImtAndVTable();
 }
 
 inline int32_t Class::GetVTableLength() {
-  if (ShouldHaveEmbeddedVTable()) {
+  if (ShouldHaveEmbeddedImtAndVTable()) {
     return GetEmbeddedVTableLength();
   }
   return GetVTable() != nullptr ? GetVTable()->GetLength() : 0;
 }
 
 inline ArtMethod* Class::GetVTableEntry(uint32_t i, size_t pointer_size) {
-  if (ShouldHaveEmbeddedVTable()) {
+  if (ShouldHaveEmbeddedImtAndVTable()) {
     return GetEmbeddedVTableEntry(i, pointer_size);
   }
   auto* vtable = GetVTable();
@@ -275,14 +294,6 @@
   SetField32<false>(MemberOffset(EmbeddedVTableLengthOffset()), len);
 }
 
-inline ImTable* Class::GetImt(size_t pointer_size) {
-  return GetFieldPtrWithSize<ImTable*>(MemberOffset(ImtPtrOffset(pointer_size)), pointer_size);
-}
-
-inline void Class::SetImt(ImTable* imt, size_t pointer_size) {
-  return SetFieldPtrWithSize<false>(MemberOffset(ImtPtrOffset(pointer_size)), imt, pointer_size);
-}
-
 inline MemberOffset Class::EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size) {
   return MemberOffset(
       EmbeddedVTableOffset(pointer_size).Uint32Value() + i * VTableEntrySize(pointer_size));
@@ -530,7 +541,7 @@
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(size_t pointer_size) {
   DCHECK(IsResolved());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>()) {
+  if (ShouldHaveEmbeddedImtAndVTable<kVerifyFlags, kReadBarrierOption>()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(
         true, GetEmbeddedVTableLength(), 0, 0, 0, 0, 0, pointer_size);
@@ -541,7 +552,7 @@
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking(size_t pointer_size) {
   DCHECK(IsLoaded());
   uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-  if (ShouldHaveEmbeddedVTable()) {
+  if (ShouldHaveEmbeddedImtAndVTable()) {
     // Static fields come after the embedded tables.
     base = mirror::Class::ComputeClassSize(true, GetVTableDuringLinking()->GetLength(),
                                            0, 0, 0, 0, 0, pointer_size);
@@ -700,7 +711,7 @@
   return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
-inline uint32_t Class::ComputeClassSize(bool has_embedded_vtable,
+inline uint32_t Class::ComputeClassSize(bool has_embedded_tables,
                                         uint32_t num_vtable_entries,
                                         uint32_t num_8bit_static_fields,
                                         uint32_t num_16bit_static_fields,
@@ -711,10 +722,11 @@
   // Space used by java.lang.Class and its instance fields.
   uint32_t size = sizeof(Class);
   // Space used by embedded tables.
-  if (has_embedded_vtable) {
-    size = RoundUp(size + sizeof(uint32_t), pointer_size);
-    size += pointer_size;  // size of pointer to IMT
-    size += num_vtable_entries * VTableEntrySize(pointer_size);
+  if (has_embedded_tables) {
+    const uint32_t embedded_imt_size = kImtSize * ImTableEntrySize(pointer_size);
+    const uint32_t embedded_vtable_size = num_vtable_entries * VTableEntrySize(pointer_size);
+    size = RoundUp(size + sizeof(uint32_t) /* embedded vtable len */, pointer_size) +
+        embedded_imt_size + embedded_vtable_size;
   }
 
   // Space used by reference statics.
@@ -978,9 +990,18 @@
   return MakeIterationRangeFromLengthPrefixedArray(GetSFieldsPtrUnchecked());
 }
 
+inline MemberOffset Class::EmbeddedImTableOffset(size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  // Round up since we want the embedded imt and vtable to be pointer size aligned in case 64 bits.
+  // Add 32 bits for embedded vtable length.
+  return MemberOffset(
+      RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
+}
+
 inline MemberOffset Class::EmbeddedVTableOffset(size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  return MemberOffset(ImtPtrOffset(pointer_size).Uint32Value() + pointer_size);
+  return MemberOffset(EmbeddedImTableOffset(pointer_size).Uint32Value() +
+                      kImtSize * ImTableEntrySize(pointer_size));
 }
 
 inline void Class::CheckPointerSize(size_t pointer_size) {
@@ -1065,7 +1086,7 @@
     dest->SetDexCacheStrings(new_strings);
   }
   // Fix up embedded tables.
-  if (!IsTemp() && ShouldHaveEmbeddedVTable<kVerifyNone, kReadBarrierOption>()) {
+  if (!IsTemp() && ShouldHaveEmbeddedImtAndVTable<kVerifyNone, kReadBarrierOption>()) {
     for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
       ArtMethod* method = GetEmbeddedVTableEntry(i, pointer_size);
       ArtMethod* new_method = visitor(method);
@@ -1073,9 +1094,16 @@
         dest->SetEmbeddedVTableEntryUnchecked(i, new_method, pointer_size);
       }
     }
-  }
-  if (!IsTemp() && ShouldHaveImt<kVerifyNone, kReadBarrierOption>()) {
-    dest->SetImt(visitor(GetImt(pointer_size)), pointer_size);
+    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+      ArtMethod* method = GetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
+                                                                                    pointer_size);
+      ArtMethod* new_method = visitor(method);
+      if (method != new_method) {
+        dest->SetEmbeddedImTableEntry<kVerifyFlags, kReadBarrierOption>(i,
+                                                                        new_method,
+                                                                        pointer_size);
+      }
+    }
   }
 }
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 9c77d38..b4a23ba 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -914,7 +914,13 @@
   return GetDexFile().GetInterfacesList(*class_def);
 }
 
-void Class::PopulateEmbeddedVTable(size_t pointer_size) {
+void Class::PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize],
+                                         size_t pointer_size) {
+  for (size_t i = 0; i < kImtSize; i++) {
+    auto method = methods[i];
+    DCHECK(method != nullptr);
+    SetEmbeddedImTableEntry(i, method, pointer_size);
+  }
   PointerArray* table = GetVTableDuringLinking();
   CHECK(table != nullptr) << PrettyClass(this);
   const size_t table_length = table->GetLength();
@@ -961,7 +967,7 @@
 class CopyClassVisitor {
  public:
   CopyClassVisitor(Thread* self, Handle<mirror::Class>* orig, size_t new_length,
-                   size_t copy_bytes, ImTable* imt,
+                   size_t copy_bytes, ArtMethod* const (&imt)[mirror::Class::kImtSize],
                    size_t pointer_size)
       : self_(self), orig_(orig), new_length_(new_length),
         copy_bytes_(copy_bytes), imt_(imt), pointer_size_(pointer_size) {
@@ -973,8 +979,7 @@
     Handle<mirror::Class> h_new_class_obj(hs.NewHandle(obj->AsClass()));
     mirror::Object::CopyObject(self_, h_new_class_obj.Get(), orig_->Get(), copy_bytes_);
     mirror::Class::SetStatus(h_new_class_obj, Class::kStatusResolving, self_);
-    h_new_class_obj->PopulateEmbeddedVTable(pointer_size_);
-    h_new_class_obj->SetImt(imt_, pointer_size_);
+    h_new_class_obj->PopulateEmbeddedImtAndVTable(imt_, pointer_size_);
     h_new_class_obj->SetClassSize(new_length_);
     // Visit all of the references to make sure there is no from space references in the native
     // roots.
@@ -987,13 +992,13 @@
   Handle<mirror::Class>* const orig_;
   const size_t new_length_;
   const size_t copy_bytes_;
-  ImTable* imt_;
+  ArtMethod* const (&imt_)[mirror::Class::kImtSize];
   const size_t pointer_size_;
   DISALLOW_COPY_AND_ASSIGN(CopyClassVisitor);
 };
 
 Class* Class::CopyOf(Thread* self, int32_t new_length,
-                     ImTable* imt, size_t pointer_size) {
+                     ArtMethod* const (&imt)[mirror::Class::kImtSize], size_t pointer_size) {
   DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 2adf54a..5235a3e 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -22,7 +22,6 @@
 #include "class_flags.h"
 #include "gc_root.h"
 #include "gc/allocator_type.h"
-#include "imtable.h"
 #include "invoke_type.h"
 #include "modifiers.h"
 #include "object.h"
@@ -34,6 +33,10 @@
 #include "thread.h"
 #include "utils.h"
 
+#ifndef IMT_SIZE
+#error IMT_SIZE not defined
+#endif
+
 namespace art {
 
 class ArtField;
@@ -63,6 +66,11 @@
   // 2 ref instance fields.]
   static constexpr uint32_t kClassWalkSuper = 0xC0000000;
 
+  // Interface method table size. Increasing this value reduces the chance of two interface methods
+  // colliding in the interface method table but increases the size of classes that implement
+  // (non-marker) interfaces.
+  static constexpr size_t kImtSize = IMT_SIZE;
+
   // Class Status
   //
   // kStatusRetired: Class that's temporarily used till class linking time
@@ -343,7 +351,7 @@
   // be replaced with a class with the right size for embedded imt/vtable.
   bool IsTemp() SHARED_REQUIRES(Locks::mutator_lock_) {
     Status s = GetStatus();
-    return s < Status::kStatusResolving && ShouldHaveEmbeddedVTable();
+    return s < Status::kStatusResolving && ShouldHaveEmbeddedImtAndVTable();
   }
 
   String* GetName() SHARED_REQUIRES(Locks::mutator_lock_);  // Returns the cached name.
@@ -549,7 +557,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Compute how many bytes would be used a class with the given elements.
-  static uint32_t ComputeClassSize(bool has_embedded_vtable,
+  static uint32_t ComputeClassSize(bool has_embedded_tables,
                                    uint32_t num_vtable_entries,
                                    uint32_t num_8bit_static_fields,
                                    uint32_t num_16bit_static_fields,
@@ -822,29 +830,28 @@
     return MemberOffset(sizeof(Class));
   }
 
-  static MemberOffset ImtPtrOffset(size_t pointer_size) {
-    return MemberOffset(
-        RoundUp(EmbeddedVTableLengthOffset().Uint32Value() + sizeof(uint32_t), pointer_size));
-  }
-
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool ShouldHaveImt() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>() &&
-        GetIfTable<kVerifyFlags, kReadBarrierOption>() != nullptr &&
-        !IsArrayClass<kVerifyFlags, kReadBarrierOption>();
-  }
-
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool ShouldHaveEmbeddedVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool ShouldHaveEmbeddedImtAndVTable() SHARED_REQUIRES(Locks::mutator_lock_) {
     return IsInstantiable<kVerifyFlags, kReadBarrierOption>();
   }
 
   bool HasVTable() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static MemberOffset EmbeddedImTableEntryOffset(uint32_t i, size_t pointer_size);
+
   static MemberOffset EmbeddedVTableEntryOffset(uint32_t i, size_t pointer_size);
 
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  ArtMethod* GetEmbeddedImTableEntry(uint32_t i, size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  void SetEmbeddedImTableEntry(uint32_t i, ArtMethod* method, size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   int32_t GetVTableLength() SHARED_REQUIRES(Locks::mutator_lock_);
 
   ArtMethod* GetVTableEntry(uint32_t i, size_t pointer_size)
@@ -854,10 +861,6 @@
 
   void SetEmbeddedVTableLength(int32_t len) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ImTable* GetImt(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void SetImt(ImTable* imt, size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
-
   ArtMethod* GetEmbeddedVTableEntry(uint32_t i, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -867,7 +870,7 @@
   inline void SetEmbeddedVTableEntryUnchecked(uint32_t i, ArtMethod* method, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void PopulateEmbeddedVTable(size_t pointer_size)
+  void PopulateEmbeddedImtAndVTable(ArtMethod* const (&methods)[kImtSize], size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Given a method implemented by this class but potentially from a super class, return the
@@ -1192,7 +1195,7 @@
   void AssertInitializedOrInitializingInThread(Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  Class* CopyOf(Thread* self, int32_t new_length, ImTable* imt,
+  Class* CopyOf(Thread* self, int32_t new_length, ArtMethod* const (&imt)[mirror::Class::kImtSize],
                 size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -1319,7 +1322,10 @@
 
   // Check that the pointer size matches the one in the class linker.
   ALWAYS_INLINE static void CheckPointerSize(size_t pointer_size);
+
+  static MemberOffset EmbeddedImTableOffset(size_t pointer_size);
   static MemberOffset EmbeddedVTableOffset(size_t pointer_size);
+
   template <bool kVisitNativeRoots,
             VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 6285542..96f2098 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -20,6 +20,7 @@
 #include "array.h"
 #include "base/bit_utils.h"
 #include "class.h"
+#include "common_throws.h"
 #include "gc/heap-inl.h"
 #include "globals.h"
 #include "intern_table.h"
@@ -33,7 +34,7 @@
 namespace mirror {
 
 inline uint32_t String::ClassSize(size_t pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 56;
+  uint32_t vtable_entries = Object::kVTableLength + 57;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 1, 2, pointer_size);
 }
 
@@ -134,9 +135,7 @@
 inline uint16_t String::CharAt(int32_t index) {
   int32_t count = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_));
   if (UNLIKELY((index < 0) || (index >= count))) {
-    Thread* self = Thread::Current();
-    self->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
-                             "length=%i; index=%i", count, index);
+    ThrowStringIndexOutOfBoundsException(index, count);
     return 0;
   }
   return GetValue()[index];
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 71c866f..396c946 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -220,7 +220,7 @@
 struct NthCallerWithDexPcVisitor FINAL : public StackVisitor {
   explicit NthCallerWithDexPcVisitor(Thread* thread, size_t frame)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve),
+      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
         method_(nullptr),
         dex_pc_(0),
         current_frame_number_(0),
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 8c7c966..46be5e6 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -33,7 +33,6 @@
 #include "oat_file_assistant.h"
 #include "oat_file_manager.h"
 #include "os.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/oat.h b/runtime/oat.h
index 52d4c42..6243660 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '8', '1', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '8', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 62c723e..61dc287 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -1167,12 +1167,15 @@
 
 std::unique_ptr<const DexFile> OatFile::OatDexFile::OpenDexFile(std::string* error_msg) const {
   ScopedTrace trace(__PRETTY_FUNCTION__);
+  static constexpr bool kVerify = false;
+  static constexpr bool kVerifyChecksum = false;
   return DexFile::Open(dex_file_pointer_,
                        FileSize(),
                        dex_file_location_,
                        dex_file_location_checksum_,
                        this,
-                       false /* verify */,
+                       kVerify,
+                       kVerifyChecksum,
                        error_msg);
 }
 
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index bb7b408..d55e373 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -28,7 +28,6 @@
 #include "compiler_filter.h"
 #include "oat_file.h"
 #include "os.h"
-#include "profiler.h"
 
 namespace art {
 
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index c79a9a6..a1d3ed9 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include "oat_file_assistant.h"
-
 #include <algorithm>
 #include <fstream>
 #include <string>
@@ -29,8 +27,10 @@
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "compiler_callbacks.h"
+#include "dex2oat_environment_test.h"
 #include "gc/space/image_space.h"
 #include "mem_map.h"
+#include "oat_file_assistant.h"
 #include "oat_file_manager.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
@@ -39,184 +39,74 @@
 
 namespace art {
 
-class OatFileAssistantTest : public CommonRuntimeTest {
+class OatFileAssistantTest : public Dex2oatEnvironmentTest {
  public:
-  virtual void SetUp() {
+  virtual void SetUp() OVERRIDE {
     ReserveImageSpace();
-    CommonRuntimeTest::SetUp();
-
-    // Create a scratch directory to work from.
-    scratch_dir_ = android_data_ + "/OatFileAssistantTest";
-    ASSERT_EQ(0, mkdir(scratch_dir_.c_str(), 0700));
-
-    // Create a subdirectory in scratch for odex files.
-    odex_oat_dir_ = scratch_dir_ + "/oat";
-    ASSERT_EQ(0, mkdir(odex_oat_dir_.c_str(), 0700));
-
-    odex_dir_ = odex_oat_dir_ + "/" + std::string(GetInstructionSetString(kRuntimeISA));
-    ASSERT_EQ(0, mkdir(odex_dir_.c_str(), 0700));
-
-
-    // Verify the environment is as we expect
-    uint32_t checksum;
-    std::string error_msg;
-    ASSERT_TRUE(OS::FileExists(GetImageFile().c_str()))
-      << "Expected pre-compiled boot image to be at: " << GetImageFile();
-    ASSERT_TRUE(OS::FileExists(GetDexSrc1().c_str()))
-      << "Expected dex file to be at: " << GetDexSrc1();
-    ASSERT_TRUE(OS::FileExists(GetStrippedDexSrc1().c_str()))
-      << "Expected stripped dex file to be at: " << GetStrippedDexSrc1();
-    ASSERT_FALSE(DexFile::GetChecksum(GetStrippedDexSrc1().c_str(), &checksum, &error_msg))
-      << "Expected stripped dex file to be stripped: " << GetStrippedDexSrc1();
-    ASSERT_TRUE(OS::FileExists(GetDexSrc2().c_str()))
-      << "Expected dex file to be at: " << GetDexSrc2();
-
-    // GetMultiDexSrc2 should have the same primary dex checksum as
-    // GetMultiDexSrc1, but a different secondary dex checksum.
-    std::vector<std::unique_ptr<const DexFile>> multi1;
-    ASSERT_TRUE(DexFile::Open(GetMultiDexSrc1().c_str(),
-          GetMultiDexSrc1().c_str(), &error_msg, &multi1)) << error_msg;
-    ASSERT_GT(multi1.size(), 1u);
-
-    std::vector<std::unique_ptr<const DexFile>> multi2;
-    ASSERT_TRUE(DexFile::Open(GetMultiDexSrc2().c_str(),
-          GetMultiDexSrc2().c_str(), &error_msg, &multi2)) << error_msg;
-    ASSERT_GT(multi2.size(), 1u);
-
-    ASSERT_EQ(multi1[0]->GetLocationChecksum(), multi2[0]->GetLocationChecksum());
-    ASSERT_NE(multi1[1]->GetLocationChecksum(), multi2[1]->GetLocationChecksum());
+    Dex2oatEnvironmentTest::SetUp();
   }
 
-  virtual void SetUpRuntimeOptions(RuntimeOptions* options) {
-    // options->push_back(std::make_pair("-verbose:oat", nullptr));
+  // Pre-Relocate the image to a known non-zero offset so we don't have to
+  // deal with the runtime randomly relocating the image by 0 and messing up
+  // the expected results of the tests.
+  bool PreRelocateImage(std::string* error_msg) {
+    std::string image;
+    if (!GetCachedImageFile(&image, error_msg)) {
+      return false;
+    }
 
-    // Set up the image location.
-    options->push_back(std::make_pair("-Ximage:" + GetImageLocation(),
-          nullptr));
-    // Make sure compilercallbacks are not set so that relocation will be
-    // enabled.
-    callbacks_.reset();
+    std::string patchoat = GetAndroidRoot();
+    patchoat += kIsDebugBuild ? "/bin/patchoatd" : "/bin/patchoat";
+
+    std::vector<std::string> argv;
+    argv.push_back(patchoat);
+    argv.push_back("--input-image-location=" + GetImageLocation());
+    argv.push_back("--output-image-file=" + image);
+    argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(kRuntimeISA)));
+    argv.push_back("--base-offset-delta=0x00008000");
+    return Exec(argv, error_msg);
   }
 
   virtual void PreRuntimeCreate() {
+    std::string error_msg;
+    ASSERT_TRUE(PreRelocateImage(&error_msg)) << error_msg;
+
     UnreserveImageSpace();
   }
 
-  virtual void PostRuntimeCreate() {
+  virtual void PostRuntimeCreate() OVERRIDE {
     ReserveImageSpace();
   }
 
-  virtual void TearDown() {
-    ClearDirectory(odex_dir_.c_str());
-    ASSERT_EQ(0, rmdir(odex_dir_.c_str()));
-
-    ClearDirectory(odex_oat_dir_.c_str());
-    ASSERT_EQ(0, rmdir(odex_oat_dir_.c_str()));
-
-    ClearDirectory(scratch_dir_.c_str());
-    ASSERT_EQ(0, rmdir(scratch_dir_.c_str()));
-
-    CommonRuntimeTest::TearDown();
-  }
-
-  void Copy(std::string src, std::string dst) {
-    std::ifstream  src_stream(src, std::ios::binary);
-    std::ofstream  dst_stream(dst, std::ios::binary);
-
-    dst_stream << src_stream.rdbuf();
-  }
-
-  // Returns the directory where the pre-compiled core.art can be found.
-  // TODO: We should factor out this into common tests somewhere rather than
-  // re-hardcoding it here (This was copied originally from the elf writer
-  // test).
-  std::string GetImageDirectory() {
-    if (IsHost()) {
-      const char* host_dir = getenv("ANDROID_HOST_OUT");
-      CHECK(host_dir != nullptr);
-      return std::string(host_dir) + "/framework";
-    } else {
-      return std::string("/data/art-test");
-    }
-  }
-
-  std::string GetImageLocation() {
-    return GetImageDirectory() + "/core.art";
-  }
-
-  std::string GetImageFile() {
-    return GetImageDirectory() + "/" + GetInstructionSetString(kRuntimeISA)
-      + "/core.art";
-  }
-
-  std::string GetDexSrc1() {
-    return GetTestDexFileName("Main");
-  }
-
-  // Returns the path to a dex file equivalent to GetDexSrc1, but with the dex
-  // file stripped.
-  std::string GetStrippedDexSrc1() {
-    return GetTestDexFileName("MainStripped");
-  }
-
-  std::string GetMultiDexSrc1() {
-    return GetTestDexFileName("MultiDex");
-  }
-
-  // Returns the path to a multidex file equivalent to GetMultiDexSrc2, but
-  // with the contents of the secondary dex file changed.
-  std::string GetMultiDexSrc2() {
-    return GetTestDexFileName("MultiDexModifiedSecondary");
-  }
-
-  std::string GetDexSrc2() {
-    return GetTestDexFileName("Nested");
-  }
-
-  // Scratch directory, for dex and odex files (oat files will go in the
-  // dalvik cache).
-  std::string GetScratchDir() {
-    return scratch_dir_;
-  }
-
-  // Odex directory is the subdirectory in the scratch directory where odex
-  // files should be located.
-  std::string GetOdexDir() {
-    return odex_dir_;
-  }
-
   // Generate a non-PIC odex file for the purposes of test.
   // The generated odex file will be un-relocated.
   void GenerateOdexForTest(const std::string& dex_location,
                            const std::string& odex_location,
-                           CompilerFilter::Filter filter) {
-    // To generate an un-relocated odex file, we first compile a relocated
-    // version of the file, then manually call patchoat to make it look as if
-    // it is unrelocated.
-    std::string relocated_odex_location = odex_location + ".relocated";
+                           CompilerFilter::Filter filter,
+                           bool pic = false,
+                           bool with_patch_info = true) {
+    // Temporarily redirect the dalvik cache so dex2oat doesn't find the
+    // relocated image file.
+    std::string android_data_tmp = GetScratchDir() + "AndroidDataTmp";
+    setenv("ANDROID_DATA", android_data_tmp.c_str(), 1);
     std::vector<std::string> args;
     args.push_back("--dex-file=" + dex_location);
-    args.push_back("--oat-file=" + relocated_odex_location);
+    args.push_back("--oat-file=" + odex_location);
     args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
+    args.push_back("--runtime-arg");
+    args.push_back("-Xnorelocate");
 
-    // We need to use the quick compiler to generate non-PIC code, because
-    // the optimizing compiler always generates PIC.
-    args.push_back("--compiler-backend=Quick");
-    args.push_back("--include-patch-information");
+    if (pic) {
+      args.push_back("--compile-pic");
+    }
+
+    if (with_patch_info) {
+      args.push_back("--include-patch-information");
+    }
 
     std::string error_msg;
     ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
-
-    // Use patchoat to unrelocate the relocated odex file.
-    Runtime* runtime = Runtime::Current();
-    std::vector<std::string> argv;
-    argv.push_back(runtime->GetPatchoatExecutable());
-    argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(kRuntimeISA)));
-    argv.push_back("--input-oat-file=" + relocated_odex_location);
-    argv.push_back("--output-oat-file=" + odex_location);
-    argv.push_back("--base-offset-delta=0x00008000");
-    std::string command_line(Join(argv, ' '));
-    ASSERT_TRUE(Exec(argv, &error_msg)) << error_msg;
+    setenv("ANDROID_DATA", android_data_.c_str(), 1);
 
     // Verify the odex file was generated as expected and really is
     // unrelocated.
@@ -229,13 +119,13 @@
                                                      dex_location.c_str(),
                                                      &error_msg));
     ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
-    EXPECT_FALSE(odex_file->IsPic());
-    EXPECT_TRUE(odex_file->HasPatchInfo());
+    EXPECT_EQ(pic, odex_file->IsPic());
+    EXPECT_EQ(with_patch_info, odex_file->HasPatchInfo());
     EXPECT_EQ(filter, odex_file->GetCompilerFilter());
 
     if (CompilerFilter::IsBytecodeCompilationEnabled(filter)) {
       const std::vector<gc::space::ImageSpace*> image_spaces =
-        runtime->GetHeap()->GetBootImageSpaces();
+        Runtime::Current()->GetHeap()->GetBootImageSpaces();
       ASSERT_TRUE(!image_spaces.empty() && image_spaces[0] != nullptr);
       const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
       const OatHeader& oat_header = odex_file->GetOatHeader();
@@ -250,71 +140,15 @@
   void GeneratePicOdexForTest(const std::string& dex_location,
                               const std::string& odex_location,
                               CompilerFilter::Filter filter) {
-    // Temporarily redirect the dalvik cache so dex2oat doesn't find the
-    // relocated image file.
-    std::string android_data_tmp = GetScratchDir() + "AndroidDataTmp";
-    setenv("ANDROID_DATA", android_data_tmp.c_str(), 1);
-    std::vector<std::string> args;
-    args.push_back("--dex-file=" + dex_location);
-    args.push_back("--oat-file=" + odex_location);
-    args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
-    args.push_back("--compile-pic");
-    args.push_back("--runtime-arg");
-    args.push_back("-Xnorelocate");
-    std::string error_msg;
-    ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
-    setenv("ANDROID_DATA", android_data_.c_str(), 1);
-
-    // Verify the odex file was generated as expected.
-    std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
-                                                     odex_location.c_str(),
-                                                     nullptr,
-                                                     nullptr,
-                                                     false,
-                                                     /*low_4gb*/false,
-                                                     dex_location.c_str(),
-                                                     &error_msg));
-    ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
-    EXPECT_TRUE(odex_file->IsPic());
-    EXPECT_EQ(filter, odex_file->GetCompilerFilter());
+    GenerateOdexForTest(dex_location, odex_location, filter, true, false);
   }
 
   // Generate a non-PIC odex file without patch information for the purposes
   // of test.  The generated odex file will be un-relocated.
-  // TODO: This won't work correctly if we depend on the boot image being
-  // randomly relocated by a non-zero amount. We should have a better solution
-  // for avoiding that flakiness and duplicating code to generate odex and oat
-  // files for test.
   void GenerateNoPatchOdexForTest(const std::string& dex_location,
                                   const std::string& odex_location,
                                   CompilerFilter::Filter filter) {
-    // Temporarily redirect the dalvik cache so dex2oat doesn't find the
-    // relocated image file.
-    std::string android_data_tmp = GetScratchDir() + "AndroidDataTmp";
-    setenv("ANDROID_DATA", android_data_tmp.c_str(), 1);
-    std::vector<std::string> args;
-    args.push_back("--dex-file=" + dex_location);
-    args.push_back("--oat-file=" + odex_location);
-    args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
-    args.push_back("--runtime-arg");
-    args.push_back("-Xnorelocate");
-    std::string error_msg;
-    ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
-    setenv("ANDROID_DATA", android_data_.c_str(), 1);
-
-    // Verify the odex file was generated as expected.
-    std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
-                                                     odex_location.c_str(),
-                                                     nullptr,
-                                                     nullptr,
-                                                     false,
-                                                     /*low_4gb*/false,
-                                                     dex_location.c_str(),
-                                                     &error_msg));
-    ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
-    EXPECT_FALSE(odex_file->IsPic());
-    EXPECT_FALSE(odex_file->HasPatchInfo());
-    EXPECT_EQ(filter, odex_file->GetCompilerFilter());
+    GenerateOdexForTest(dex_location, odex_location, filter, false, false);
   }
 
  private:
@@ -326,11 +160,10 @@
     MemMap::Init();
 
     // Ensure a chunk of memory is reserved for the image space.
-    uintptr_t reservation_start = ART_BASE_ADDRESS + ART_BASE_ADDRESS_MIN_DELTA;
-    uintptr_t reservation_end = ART_BASE_ADDRESS + ART_BASE_ADDRESS_MAX_DELTA
-        // Include the main space that has to come right after the
-        // image in case of the GSS collector.
-        + 384 * MB;
+    // The reservation_end includes room for the main space that has to come
+    // right after the image in case of the GSS collector.
+    uintptr_t reservation_start = ART_BASE_ADDRESS;
+    uintptr_t reservation_end = ART_BASE_ADDRESS + 384 * MB;
 
     std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
     ASSERT_TRUE(map.get() != nullptr) << "Failed to build process map";
@@ -365,9 +198,6 @@
     image_reservation_.clear();
   }
 
-  std::string scratch_dir_;
-  std::string odex_oat_dir_;
-  std::string odex_dir_;
   std::vector<std::unique_ptr<MemMap>> image_reservation_;
 };
 
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index fbae1da..b7e6040 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -696,7 +696,9 @@
   if (dex_files.empty()) {
     if (oat_file_assistant.HasOriginalDexFiles()) {
       if (Runtime::Current()->IsDexFileFallbackEnabled()) {
-        if (!DexFile::Open(dex_location, dex_location, /*out*/ &error_msg, &dex_files)) {
+        static constexpr bool kVerifyChecksum = true;
+        if (!DexFile::Open(
+            dex_location, dex_location, kVerifyChecksum, /*out*/ &error_msg, &dex_files)) {
           LOG(WARNING) << error_msg;
           error_msgs->push_back("Failed to open dex files from " + std::string(dex_location)
                                 + " because: " + error_msg);
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index eac5b43..595a47b 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -176,8 +176,13 @@
           .WithType<unsigned int>()
           .IntoKey(M::JITInvokeTransitionWeight)
       .Define("-Xjitsaveprofilinginfo")
-          .WithValue(true)
-          .IntoKey(M::JITSaveProfilingInfo)
+          .WithType<ProfileSaverOptions>()
+          .AppendValues()
+          .IntoKey(M::ProfileSaverOpts)
+      .Define("-Xps-_")  // profile saver options -Xps-<key>:<value>
+          .WithType<ProfileSaverOptions>()
+          .AppendValues()
+          .IntoKey(M::ProfileSaverOpts)  // NOTE: Appends into same key as -Xjitsaveprofilinginfo
       .Define("-XX:HspaceCompactForOOMMinIntervalMs=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs)
@@ -244,14 +249,6 @@
                          {"wallclock",      TraceClockSource::kWall},
                          {"dualclock",      TraceClockSource::kDual}})
           .IntoKey(M::ProfileClock)
-      .Define("-Xenable-profiler")
-          .WithType<TestProfilerOptions>()
-          .AppendValues()
-          .IntoKey(M::ProfilerOpts)  // NOTE: Appends into same key as -Xprofile-*
-      .Define("-Xprofile-_")  // -Xprofile-<key>:<value>
-          .WithType<TestProfilerOptions>()
-          .AppendValues()
-          .IntoKey(M::ProfilerOpts)  // NOTE: Appends into same key as -Xenable-profiler
       .Define("-Xcompiler:_")
           .WithType<std::string>()
           .IntoKey(M::Compiler)
@@ -690,17 +687,13 @@
   UsageMessage(stream, "  -Xmethod-trace\n");
   UsageMessage(stream, "  -Xmethod-trace-file:filename");
   UsageMessage(stream, "  -Xmethod-trace-file-size:integervalue\n");
-  UsageMessage(stream, "  -Xenable-profiler\n");
-  UsageMessage(stream, "  -Xprofile-filename:filename\n");
-  UsageMessage(stream, "  -Xprofile-period:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-duration:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-interval:integervalue\n");
-  UsageMessage(stream, "  -Xprofile-backoff:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-start-immediately\n");
-  UsageMessage(stream, "  -Xprofile-top-k-threshold:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-top-k-change-threshold:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-type:{method,stack}\n");
-  UsageMessage(stream, "  -Xprofile-max-stack-depth:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-save-period-ms:integervalue\n");
+  UsageMessage(stream, "  -Xps-save-resolved-classes-delay-ms:integervalue\n");
+  UsageMessage(stream, "  -Xps-startup-method-samples:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-methods-to-save:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-classes-to-save:integervalue\n");
+  UsageMessage(stream, "  -Xps-min-notification-before-wake:integervalue\n");
+  UsageMessage(stream, "  -Xps-max-notification-before-wake:integervalue\n");
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 5974fb6..1f5beb9 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -26,7 +26,7 @@
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
 #include "arch/instruction_set.h"
-#include "profiler_options.h"
+#include "jit/profile_saver_options.h"
 #include "runtime_options.h"
 
 namespace art {
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
deleted file mode 100644
index 6a77a9e..0000000
--- a/runtime/profiler.cc
+++ /dev/null
@@ -1,920 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "profiler.h"
-
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/uio.h>
-
-#include <fstream>
-
-#include "art_method-inl.h"
-#include "base/stl_util.h"
-#include "base/time_utils.h"
-#include "base/unix_file/fd_file.h"
-#include "class_linker.h"
-#include "common_throws.h"
-#include "dex_file-inl.h"
-#include "instrumentation.h"
-#include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/object-inl.h"
-#include "os.h"
-#include "scoped_thread_state_change.h"
-#include "ScopedLocalRef.h"
-#include "thread.h"
-#include "thread_list.h"
-#include "utils.h"
-
-#include "entrypoints/quick/quick_entrypoints.h"
-
-namespace art {
-
-BackgroundMethodSamplingProfiler* BackgroundMethodSamplingProfiler::profiler_ = nullptr;
-pthread_t BackgroundMethodSamplingProfiler::profiler_pthread_ = 0U;
-volatile bool BackgroundMethodSamplingProfiler::shutting_down_ = false;
-
-// TODO: this profiler runs regardless of the state of the machine.  Maybe we should use the
-// wakelock or something to modify the run characteristics.  This can be done when we
-// have some performance data after it's been used for a while.
-
-// Walk through the method within depth of max_depth_ on the Java stack
-class BoundedStackVisitor : public StackVisitor {
- public:
-  BoundedStackVisitor(std::vector<std::pair<ArtMethod*, uint32_t>>* stack,
-                      Thread* thread,
-                      uint32_t max_depth)
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        stack_(stack),
-        max_depth_(max_depth),
-        depth_(0) {}
-
-  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
-    ArtMethod* m = GetMethod();
-    if (m->IsRuntimeMethod()) {
-      return true;
-    }
-    uint32_t dex_pc_ = GetDexPc();
-    stack_->push_back(std::make_pair(m, dex_pc_));
-    ++depth_;
-    if (depth_ < max_depth_) {
-      return true;
-    } else {
-      return false;
-    }
-  }
-
- private:
-  std::vector<std::pair<ArtMethod*, uint32_t>>* const stack_;
-  const uint32_t max_depth_;
-  uint32_t depth_;
-
-  DISALLOW_COPY_AND_ASSIGN(BoundedStackVisitor);
-};
-
-// This is called from either a thread list traversal or from a checkpoint.  Regardless
-// of which caller, the mutator lock must be held.
-static void GetSample(Thread* thread, void* arg) SHARED_REQUIRES(Locks::mutator_lock_) {
-  BackgroundMethodSamplingProfiler* profiler =
-      reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-  const ProfilerOptions profile_options = profiler->GetProfilerOptions();
-  switch (profile_options.GetProfileType()) {
-    case kProfilerMethod: {
-      ArtMethod* method = thread->GetCurrentMethod(nullptr);
-      if ((false) && method == nullptr) {
-        LOG(INFO) << "No current method available";
-        std::ostringstream os;
-        thread->Dump(os);
-        std::string data(os.str());
-        LOG(INFO) << data;
-      }
-      profiler->RecordMethod(method);
-      break;
-    }
-    case kProfilerBoundedStack: {
-      std::vector<InstructionLocation> stack;
-      uint32_t max_depth = profile_options.GetMaxStackDepth();
-      BoundedStackVisitor bounded_stack_visitor(&stack, thread, max_depth);
-      bounded_stack_visitor.WalkStack();
-      profiler->RecordStack(stack);
-      break;
-    }
-    default:
-      LOG(INFO) << "This profile type is not implemented.";
-  }
-}
-
-// A closure that is called by the thread checkpoint code.
-class SampleCheckpoint FINAL : public Closure {
- public:
-  explicit SampleCheckpoint(BackgroundMethodSamplingProfiler* const profiler) :
-    profiler_(profiler) {}
-
-  void Run(Thread* thread) OVERRIDE {
-    Thread* self = Thread::Current();
-    if (thread == nullptr) {
-      LOG(ERROR) << "Checkpoint with nullptr thread";
-      return;
-    }
-
-    // Grab the mutator lock (shared access).
-    ScopedObjectAccess soa(self);
-
-    // Grab a sample.
-    GetSample(thread, this->profiler_);
-
-    // And finally tell the barrier that we're done.
-    this->profiler_->GetBarrier().Pass(self);
-  }
-
- private:
-  BackgroundMethodSamplingProfiler* const profiler_;
-};
-
-bool BackgroundMethodSamplingProfiler::ShuttingDown(Thread* self) {
-  MutexLock mu(self, *Locks::profiler_lock_);
-  return shutting_down_;
-}
-
-void* BackgroundMethodSamplingProfiler::RunProfilerThread(void* arg) {
-  Runtime* runtime = Runtime::Current();
-  BackgroundMethodSamplingProfiler* profiler =
-      reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-
-  // Add a random delay for the first time run so that we don't hammer the CPU
-  // with all profiles running at the same time.
-  const int kRandomDelayMaxSecs = 30;
-  const double kMaxBackoffSecs = 24*60*60;   // Max backoff time.
-
-  srand(MicroTime() * getpid());
-  int startup_delay = rand() % kRandomDelayMaxSecs;   // random delay for startup.
-
-
-  CHECK(runtime->AttachCurrentThread("Profiler", true, runtime->GetSystemThreadGroup(),
-                                      !runtime->IsAotCompiler()));
-
-  Thread* self = Thread::Current();
-
-  double backoff = 1.0;
-  while (true) {
-    if (ShuttingDown(self)) {
-      break;
-    }
-
-    {
-      // wait until we need to run another profile
-      uint64_t delay_secs = profiler->options_.GetPeriodS() * backoff;
-
-      // Add a startup delay to prevent all the profiles running at once.
-      delay_secs += startup_delay;
-
-      // Immediate startup for benchmarking?
-      if (profiler->options_.GetStartImmediately() && startup_delay > 0) {
-        delay_secs = 0;
-      }
-
-      startup_delay = 0;
-
-      VLOG(profiler) << "Delaying profile start for " << delay_secs << " secs";
-      MutexLock mu(self, profiler->wait_lock_);
-      profiler->period_condition_.TimedWait(self, delay_secs * 1000, 0);
-      // We were either signaled by Stop or timedout, in either case ignore the timed out result.
-
-      // Expand the backoff by its coefficient, but don't go beyond the max.
-      backoff = std::min(backoff * profiler->options_.GetBackoffCoefficient(), kMaxBackoffSecs);
-    }
-
-    if (ShuttingDown(self)) {
-      break;
-    }
-
-
-    uint64_t start_us = MicroTime();
-    uint64_t end_us = start_us + profiler->options_.GetDurationS() * UINT64_C(1000000);
-    uint64_t now_us = start_us;
-
-    VLOG(profiler) << "Starting profiling run now for "
-                   << PrettyDuration((end_us - start_us) * 1000);
-
-    SampleCheckpoint check_point(profiler);
-
-    size_t valid_samples = 0;
-    while (now_us < end_us) {
-      if (ShuttingDown(self)) {
-        break;
-      }
-
-      usleep(profiler->options_.GetIntervalUs());    // Non-interruptible sleep.
-
-      ThreadList* thread_list = runtime->GetThreadList();
-
-      profiler->profiler_barrier_->Init(self, 0);
-      size_t barrier_count = thread_list->RunCheckpointOnRunnableThreads(&check_point);
-
-      // All threads are suspended, nothing to do.
-      if (barrier_count == 0) {
-        now_us = MicroTime();
-        continue;
-      }
-
-      valid_samples += barrier_count;
-
-      ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
-
-      // Wait for the barrier to be crossed by all runnable threads.  This wait
-      // is done with a timeout so that we can detect problems with the checkpoint
-      // running code.  We should never see this.
-      const uint32_t kWaitTimeoutMs = 10000;
-
-      // Wait for all threads to pass the barrier.
-      bool timed_out =  profiler->profiler_barrier_->Increment(self, barrier_count, kWaitTimeoutMs);
-
-      // We should never get a timeout.  If we do, it suggests a problem with the checkpoint
-      // code.  Crash the process in this case.
-      CHECK(!timed_out);
-
-      // Update the current time.
-      now_us = MicroTime();
-    }
-
-    if (valid_samples > 0) {
-      // After the profile has been taken, write it out.
-      ScopedObjectAccess soa(self);   // Acquire the mutator lock.
-      uint32_t size = profiler->WriteProfile();
-      VLOG(profiler) << "Profile size: " << size;
-    }
-  }
-
-  LOG(INFO) << "Profiler shutdown";
-  runtime->DetachCurrentThread();
-  return nullptr;
-}
-
-// Write out the profile file if we are generating a profile.
-uint32_t BackgroundMethodSamplingProfiler::WriteProfile() {
-  std::string full_name = output_filename_;
-  VLOG(profiler) << "Saving profile to " << full_name;
-
-  int fd = open(full_name.c_str(), O_RDWR);
-  if (fd < 0) {
-    // Open failed.
-    LOG(ERROR) << "Failed to open profile file " << full_name;
-    return 0;
-  }
-
-  // Lock the file for exclusive access.  This will block if another process is using
-  // the file.
-  int err = flock(fd, LOCK_EX);
-  if (err < 0) {
-    LOG(ERROR) << "Failed to lock profile file " << full_name;
-    return 0;
-  }
-
-  // Read the previous profile.
-  profile_table_.ReadPrevious(fd, options_.GetProfileType());
-
-  // Move back to the start of the file.
-  lseek(fd, 0, SEEK_SET);
-
-  // Format the profile output and write to the file.
-  std::ostringstream os;
-  uint32_t num_methods = DumpProfile(os);
-  std::string data(os.str());
-  const char *p = data.c_str();
-  size_t length = data.length();
-  size_t full_length = length;
-  do {
-    int n = ::write(fd, p, length);
-    p += n;
-    length -= n;
-  } while (length > 0);
-
-  // Truncate the file to the new length.
-  if (ftruncate(fd, full_length) == -1) {
-    LOG(ERROR) << "Failed to truncate profile file " << full_name;
-  }
-
-  // Now unlock the file, allowing another process in.
-  err = flock(fd, LOCK_UN);
-  if (err < 0) {
-    LOG(ERROR) << "Failed to unlock profile file " << full_name;
-  }
-
-  // Done, close the file.
-  ::close(fd);
-
-  // Clean the profile for the next time.
-  CleanProfile();
-
-  return num_methods;
-}
-
-bool BackgroundMethodSamplingProfiler::Start(
-    const std::string& output_filename, const ProfilerOptions& options) {
-  if (!options.IsEnabled()) {
-    return false;
-  }
-
-  CHECK(!output_filename.empty());
-
-  Thread* self = Thread::Current();
-  {
-    MutexLock mu(self, *Locks::profiler_lock_);
-    // Don't start two profiler threads.
-    if (profiler_ != nullptr) {
-      return true;
-    }
-  }
-
-  LOG(INFO) << "Starting profiler using output file: " << output_filename
-            << " and options: " << options;
-  {
-    MutexLock mu(self, *Locks::profiler_lock_);
-    profiler_ = new BackgroundMethodSamplingProfiler(output_filename, options);
-
-    CHECK_PTHREAD_CALL(pthread_create, (&profiler_pthread_, nullptr, &RunProfilerThread,
-        reinterpret_cast<void*>(profiler_)),
-                       "Profiler thread");
-  }
-  return true;
-}
-
-
-
-void BackgroundMethodSamplingProfiler::Stop() {
-  BackgroundMethodSamplingProfiler* profiler = nullptr;
-  pthread_t profiler_pthread = 0U;
-  {
-    MutexLock trace_mu(Thread::Current(), *Locks::profiler_lock_);
-    CHECK(!shutting_down_);
-    profiler = profiler_;
-    shutting_down_ = true;
-    profiler_pthread = profiler_pthread_;
-  }
-
-  // Now wake up the sampler thread if it sleeping.
-  {
-    MutexLock profile_mu(Thread::Current(), profiler->wait_lock_);
-    profiler->period_condition_.Signal(Thread::Current());
-  }
-  // Wait for the sample thread to stop.
-  CHECK_PTHREAD_CALL(pthread_join, (profiler_pthread, nullptr), "profiler thread shutdown");
-
-  {
-    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-    profiler_ = nullptr;
-  }
-  delete profiler;
-}
-
-
-void BackgroundMethodSamplingProfiler::Shutdown() {
-  Stop();
-}
-
-BackgroundMethodSamplingProfiler::BackgroundMethodSamplingProfiler(
-  const std::string& output_filename, const ProfilerOptions& options)
-    : output_filename_(output_filename),
-      options_(options),
-      wait_lock_("Profile wait lock"),
-      period_condition_("Profile condition", wait_lock_),
-      profile_table_(wait_lock_),
-      profiler_barrier_(new Barrier(0)) {
-  // Populate the filtered_methods set.
-  // This is empty right now, but to add a method, do this:
-  //
-  // filtered_methods_.insert("void java.lang.Object.wait(long, int)");
-}
-
-// Filter out methods the profiler doesn't want to record.
-// We require mutator lock since some statistics will be updated here.
-bool BackgroundMethodSamplingProfiler::ProcessMethod(ArtMethod* method) {
-  if (method == nullptr) {
-    profile_table_.NullMethod();
-    // Don't record a null method.
-    return false;
-  }
-
-  mirror::Class* cls = method->GetDeclaringClass();
-  if (cls != nullptr) {
-    if (cls->GetClassLoader() == nullptr) {
-      // Don't include things in the boot
-      profile_table_.BootMethod();
-      return false;
-    }
-  }
-
-  bool is_filtered = false;
-
-  if (strcmp(method->GetName(), "<clinit>") == 0) {
-    // always filter out class init
-    is_filtered = true;
-  }
-
-  // Filter out methods by name if there are any.
-  if (!is_filtered && filtered_methods_.size() > 0) {
-    std::string method_full_name = PrettyMethod(method);
-
-    // Don't include specific filtered methods.
-    is_filtered = filtered_methods_.count(method_full_name) != 0;
-  }
-  return !is_filtered;
-}
-
-// A method has been hit, record its invocation in the method map.
-// The mutator_lock must be held (shared) when this is called.
-void BackgroundMethodSamplingProfiler::RecordMethod(ArtMethod* method) {
-  // Add to the profile table unless it is filtered out.
-  if (ProcessMethod(method)) {
-    profile_table_.Put(method);
-  }
-}
-
-// Record the current bounded stack into sampling results.
-void BackgroundMethodSamplingProfiler::RecordStack(const std::vector<InstructionLocation>& stack) {
-  if (stack.size() == 0) {
-    return;
-  }
-  // Get the method on top of the stack. We use this method to perform filtering.
-  ArtMethod* method = stack.front().first;
-  if (ProcessMethod(method)) {
-      profile_table_.PutStack(stack);
-  }
-}
-
-// Clean out any recordings for the method traces.
-void BackgroundMethodSamplingProfiler::CleanProfile() {
-  profile_table_.Clear();
-}
-
-uint32_t BackgroundMethodSamplingProfiler::DumpProfile(std::ostream& os) {
-  return profile_table_.Write(os, options_.GetProfileType());
-}
-
-// Profile Table.
-// This holds a mapping of ArtMethod* to a count of how many times a sample
-// hit it at the top of the stack.
-ProfileSampleResults::ProfileSampleResults(Mutex& lock)
-    : lock_(lock),
-      num_samples_(0U),
-      num_null_methods_(0U),
-      num_boot_methods_(0U),
-      previous_num_samples_(0U),
-      previous_num_null_methods_(0U),
-      previous_num_boot_methods_(0U) {
-  for (int i = 0; i < kHashSize; i++) {
-    table[i] = nullptr;
-  }
-  method_context_table = nullptr;
-  stack_trie_root_ = nullptr;
-}
-
-ProfileSampleResults::~ProfileSampleResults() {
-  Clear();
-}
-
-// Add a method to the profile table.  If it's the first time the method
-// has been seen, add it with count=1, otherwise increment the count.
-void ProfileSampleResults::Put(ArtMethod* method) {
-  MutexLock mu(Thread::Current(), lock_);
-  uint32_t index = Hash(method);
-  if (table[index] == nullptr) {
-    table[index] = new Map();
-  }
-  Map::iterator i = table[index]->find(method);
-  if (i == table[index]->end()) {
-    (*table[index])[method] = 1;
-  } else {
-    i->second++;
-  }
-  num_samples_++;
-}
-
-// Add a bounded stack to the profile table. Only the count of the method on
-// top of the frame will be increased.
-void ProfileSampleResults::PutStack(const std::vector<InstructionLocation>& stack) {
-  MutexLock mu(Thread::Current(), lock_);
-  ScopedObjectAccess soa(Thread::Current());
-  if (stack_trie_root_ == nullptr) {
-    // The root of the stack trie is a dummy node so that we don't have to maintain
-    // a collection of tries.
-    stack_trie_root_ = new StackTrieNode();
-  }
-
-  StackTrieNode* current = stack_trie_root_;
-  if (stack.size() == 0) {
-    current->IncreaseCount();
-    return;
-  }
-
-  for (std::vector<InstructionLocation>::const_reverse_iterator iter = stack.rbegin();
-       iter != stack.rend(); ++iter) {
-    InstructionLocation inst_loc = *iter;
-    ArtMethod* method = inst_loc.first;
-    if (method == nullptr) {
-      // skip null method
-      continue;
-    }
-    uint32_t dex_pc = inst_loc.second;
-    uint32_t method_idx = method->GetDexMethodIndex();
-    const DexFile* dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-    MethodReference method_ref(dex_file, method_idx);
-    StackTrieNode* child = current->FindChild(method_ref, dex_pc);
-    if (child != nullptr) {
-      current = child;
-    } else {
-      uint32_t method_size = 0;
-      const DexFile::CodeItem* codeitem = method->GetCodeItem();
-      if (codeitem != nullptr) {
-        method_size = codeitem->insns_size_in_code_units_;
-      }
-      StackTrieNode* new_node = new StackTrieNode(method_ref, dex_pc, method_size, current);
-      current->AppendChild(new_node);
-      current = new_node;
-    }
-  }
-
-  if (current != stack_trie_root_ && current->GetCount() == 0) {
-    // Insert into method_context table;
-    if (method_context_table == nullptr) {
-      method_context_table = new MethodContextMap();
-    }
-    MethodReference method = current->GetMethod();
-    MethodContextMap::iterator i = method_context_table->find(method);
-    if (i == method_context_table->end()) {
-      TrieNodeSet* node_set = new TrieNodeSet();
-      node_set->insert(current);
-      (*method_context_table)[method] = node_set;
-    } else {
-      TrieNodeSet* node_set = i->second;
-      node_set->insert(current);
-    }
-  }
-  current->IncreaseCount();
-  num_samples_++;
-}
-
-// Write the profile table to the output stream.  Also merge with the previous profile.
-uint32_t ProfileSampleResults::Write(std::ostream& os, ProfileDataType type) {
-  ScopedObjectAccess soa(Thread::Current());
-  num_samples_ += previous_num_samples_;
-  num_null_methods_ += previous_num_null_methods_;
-  num_boot_methods_ += previous_num_boot_methods_;
-
-  VLOG(profiler) << "Profile: "
-                 << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
-  os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
-  uint32_t num_methods = 0;
-  if (type == kProfilerMethod) {
-    for (int i = 0 ; i < kHashSize; i++) {
-      Map *map = table[i];
-      if (map != nullptr) {
-        for (const auto &meth_iter : *map) {
-          ArtMethod *method = meth_iter.first;
-          std::string method_name = PrettyMethod(method);
-
-          const DexFile::CodeItem* codeitem = method->GetCodeItem();
-          uint32_t method_size = 0;
-          if (codeitem != nullptr) {
-            method_size = codeitem->insns_size_in_code_units_;
-          }
-          uint32_t count = meth_iter.second;
-
-          // Merge this profile entry with one from a previous run (if present).  Also
-          // remove the previous entry.
-          PreviousProfile::iterator pi = previous_.find(method_name);
-          if (pi != previous_.end()) {
-            count += pi->second.count_;
-            previous_.erase(pi);
-          }
-          os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), count, method_size);
-          ++num_methods;
-        }
-      }
-    }
-  } else if (type == kProfilerBoundedStack) {
-    if (method_context_table != nullptr) {
-      for (const auto &method_iter : *method_context_table) {
-        MethodReference method = method_iter.first;
-        TrieNodeSet* node_set = method_iter.second;
-        std::string method_name = PrettyMethod(method.dex_method_index, *(method.dex_file));
-        uint32_t method_size = 0;
-        uint32_t total_count = 0;
-        PreviousContextMap new_context_map;
-        for (const auto &trie_node_i : *node_set) {
-          StackTrieNode* node = trie_node_i;
-          method_size = node->GetMethodSize();
-          uint32_t count = node->GetCount();
-          uint32_t dexpc = node->GetDexPC();
-          total_count += count;
-
-          StackTrieNode* current = node->GetParent();
-          // We go backward on the trie to retrieve context and dex_pc until the dummy root.
-          // The format of the context is "method_1@pc_1@method_2@pc_2@..."
-          std::vector<std::string> context_vector;
-          while (current != nullptr && current->GetParent() != nullptr) {
-            context_vector.push_back(StringPrintf("%s@%u",
-                PrettyMethod(current->GetMethod().dex_method_index, *(current->GetMethod().dex_file)).c_str(),
-                current->GetDexPC()));
-            current = current->GetParent();
-          }
-          std::string context_sig = Join(context_vector, '@');
-          new_context_map[std::make_pair(dexpc, context_sig)] = count;
-        }
-
-        PreviousProfile::iterator pi = previous_.find(method_name);
-        if (pi != previous_.end()) {
-          total_count += pi->second.count_;
-          PreviousContextMap* previous_context_map = pi->second.context_map_;
-          if (previous_context_map != nullptr) {
-            for (const auto &context_i : *previous_context_map) {
-              uint32_t count = context_i.second;
-              PreviousContextMap::iterator ci = new_context_map.find(context_i.first);
-              if (ci == new_context_map.end()) {
-                new_context_map[context_i.first] = count;
-              } else {
-                ci->second += count;
-              }
-            }
-          }
-          delete previous_context_map;
-          previous_.erase(pi);
-        }
-        // We write out profile data with dex pc and context information in the following format:
-        // "method/total_count/size/[pc_1:count_1:context_1#pc_2:count_2:context_2#...]".
-        std::vector<std::string> context_count_vector;
-        for (const auto &context_i : new_context_map) {
-          context_count_vector.push_back(StringPrintf("%u:%u:%s", context_i.first.first,
-              context_i.second, context_i.first.second.c_str()));
-        }
-        os << StringPrintf("%s/%u/%u/[%s]\n", method_name.c_str(), total_count,
-            method_size, Join(context_count_vector, '#').c_str());
-        ++num_methods;
-      }
-    }
-  }
-
-  // Now we write out the remaining previous methods.
-  for (const auto &pi : previous_) {
-    if (type == kProfilerMethod) {
-      os << StringPrintf("%s/%u/%u\n",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
-    } else if (type == kProfilerBoundedStack) {
-      os << StringPrintf("%s/%u/%u/[",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
-      PreviousContextMap* previous_context_map = pi.second.context_map_;
-      if (previous_context_map != nullptr) {
-        std::vector<std::string> context_count_vector;
-        for (const auto &context_i : *previous_context_map) {
-          context_count_vector.push_back(StringPrintf("%u:%u:%s", context_i.first.first,
-              context_i.second, context_i.first.second.c_str()));
-        }
-        os << Join(context_count_vector, '#');
-      }
-      os << "]\n";
-    }
-    ++num_methods;
-  }
-  return num_methods;
-}
-
-void ProfileSampleResults::Clear() {
-  num_samples_ = 0;
-  num_null_methods_ = 0;
-  num_boot_methods_ = 0;
-  for (int i = 0; i < kHashSize; i++) {
-    delete table[i];
-    table[i] = nullptr;
-  }
-  if (stack_trie_root_ != nullptr) {
-    stack_trie_root_->DeleteChildren();
-    delete stack_trie_root_;
-    stack_trie_root_ = nullptr;
-    if (method_context_table != nullptr) {
-      delete method_context_table;
-      method_context_table = nullptr;
-    }
-  }
-  for (auto &pi : previous_) {
-    if (pi.second.context_map_ != nullptr) {
-      delete pi.second.context_map_;
-      pi.second.context_map_ = nullptr;
-    }
-  }
-  previous_.clear();
-}
-
-uint32_t ProfileSampleResults::Hash(ArtMethod* method) {
-  return (PointerToLowMemUInt32(method) >> 3) % kHashSize;
-}
-
-// Read a single line into the given string.  Returns true if everything OK, false
-// on EOF or error.
-static bool ReadProfileLine(int fd, std::string& line) {
-  char buf[4];
-  line.clear();
-  while (true) {
-    int n = read(fd, buf, 1);     // TODO: could speed this up but is it worth it?
-    if (n != 1) {
-      return false;
-    }
-    if (buf[0] == '\n') {
-      break;
-    }
-    line += buf[0];
-  }
-  return true;
-}
-
-void ProfileSampleResults::ReadPrevious(int fd, ProfileDataType type) {
-  // Reset counters.
-  previous_num_samples_ = previous_num_null_methods_ = previous_num_boot_methods_ = 0;
-
-  std::string line;
-
-  // The first line contains summary information.
-  if (!ReadProfileLine(fd, line)) {
-    return;
-  }
-  std::vector<std::string> summary_info;
-  Split(line, '/', &summary_info);
-  if (summary_info.size() != 3) {
-    // Bad summary info.  It should be count/nullcount/bootcount
-    return;
-  }
-  previous_num_samples_ = strtoul(summary_info[0].c_str(), nullptr, 10);
-  previous_num_null_methods_ = strtoul(summary_info[1].c_str(), nullptr, 10);
-  previous_num_boot_methods_ = strtoul(summary_info[2].c_str(), nullptr, 10);
-
-  // Now read each line until the end of file.  Each line consists of 3 or 4 fields separated by /
-  while (true) {
-    if (!ReadProfileLine(fd, line)) {
-      break;
-    }
-    std::vector<std::string> info;
-    Split(line, '/', &info);
-    if (info.size() != 3 && info.size() != 4) {
-      // Malformed.
-      break;
-    }
-    std::string methodname = info[0];
-    uint32_t total_count = strtoul(info[1].c_str(), nullptr, 10);
-    uint32_t size = strtoul(info[2].c_str(), nullptr, 10);
-    PreviousContextMap* context_map = nullptr;
-    if (type == kProfilerBoundedStack && info.size() == 4) {
-      context_map = new PreviousContextMap();
-      std::string context_counts_str = info[3].substr(1, info[3].size() - 2);
-      std::vector<std::string> context_count_pairs;
-      Split(context_counts_str, '#', &context_count_pairs);
-      for (uint32_t i = 0; i < context_count_pairs.size(); ++i) {
-        std::vector<std::string> context_count;
-        Split(context_count_pairs[i], ':', &context_count);
-        if (context_count.size() == 2) {
-          // Handles the situtation when the profile file doesn't contain context information.
-          uint32_t dexpc = strtoul(context_count[0].c_str(), nullptr, 10);
-          uint32_t count = strtoul(context_count[1].c_str(), nullptr, 10);
-          (*context_map)[std::make_pair(dexpc, "")] = count;
-        } else {
-          // Handles the situtation when the profile file contains context information.
-          uint32_t dexpc = strtoul(context_count[0].c_str(), nullptr, 10);
-          uint32_t count = strtoul(context_count[1].c_str(), nullptr, 10);
-          std::string context = context_count[2];
-          (*context_map)[std::make_pair(dexpc, context)] = count;
-        }
-      }
-    }
-    previous_[methodname] = PreviousValue(total_count, size, context_map);
-  }
-}
-
-bool ProfileFile::LoadFile(const std::string& fileName) {
-  LOG(VERBOSE) << "reading profile file " << fileName;
-  struct stat st;
-  int err = stat(fileName.c_str(), &st);
-  if (err == -1) {
-    LOG(VERBOSE) << "not found";
-    return false;
-  }
-  if (st.st_size == 0) {
-    return false;  // Empty profiles are invalid.
-  }
-  std::ifstream in(fileName.c_str());
-  if (!in) {
-    LOG(VERBOSE) << "profile file " << fileName << " exists but can't be opened";
-    LOG(VERBOSE) << "file owner: " << st.st_uid << ":" << st.st_gid;
-    LOG(VERBOSE) << "me: " << getuid() << ":" << getgid();
-    LOG(VERBOSE) << "file permissions: " << std::oct << st.st_mode;
-    LOG(VERBOSE) << "errno: " << errno;
-    return false;
-  }
-  // The first line contains summary information.
-  std::string line;
-  std::getline(in, line);
-  if (in.eof()) {
-    return false;
-  }
-  std::vector<std::string> summary_info;
-  Split(line, '/', &summary_info);
-  if (summary_info.size() != 3) {
-    // Bad summary info.  It should be total/null/boot.
-    return false;
-  }
-  // This is the number of hits in all profiled methods (without null or boot methods)
-  uint32_t total_count = strtoul(summary_info[0].c_str(), nullptr, 10);
-
-  // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
-  // Store the info in descending order given by the most used methods.
-  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
-  ProfileSet countSet;
-  while (!in.eof()) {
-    std::getline(in, line);
-    if (in.eof()) {
-      break;
-    }
-    std::vector<std::string> info;
-    Split(line, '/', &info);
-    if (info.size() != 3 && info.size() != 4) {
-      // Malformed.
-      return false;
-    }
-    int count = atoi(info[1].c_str());
-    countSet.insert(std::make_pair(-count, info));
-  }
-
-  uint32_t curTotalCount = 0;
-  ProfileSet::iterator end = countSet.end();
-  const ProfileData* prevData = nullptr;
-  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
-    const std::string& methodname = it->second[0];
-    uint32_t count = -it->first;
-    uint32_t size = strtoul(it->second[2].c_str(), nullptr, 10);
-    double usedPercent = (count * 100.0) / total_count;
-
-    curTotalCount += count;
-    // Methods with the same count should be part of the same top K percentage bucket.
-    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
-      ? prevData->GetTopKUsedPercentage()
-      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
-
-    // Add it to the profile map.
-    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
-    profile_map_[methodname] = curData;
-    prevData = &curData;
-  }
-  return true;
-}
-
-bool ProfileFile::GetProfileData(ProfileFile::ProfileData* data, const std::string& method_name) {
-  ProfileMap::iterator i = profile_map_.find(method_name);
-  if (i == profile_map_.end()) {
-    return false;
-  }
-  *data = i->second;
-  return true;
-}
-
-bool ProfileFile::GetTopKSamples(std::set<std::string>& topKSamples, double topKPercentage) {
-  ProfileMap::iterator end = profile_map_.end();
-  for (ProfileMap::iterator it = profile_map_.begin(); it != end; it++) {
-    if (it->second.GetTopKUsedPercentage() < topKPercentage) {
-      topKSamples.insert(it->first);
-    }
-  }
-  return true;
-}
-
-StackTrieNode* StackTrieNode::FindChild(MethodReference method, uint32_t dex_pc) {
-  if (children_.size() == 0) {
-    return nullptr;
-  }
-  // Create a dummy node for searching.
-  StackTrieNode* node = new StackTrieNode(method, dex_pc, 0, nullptr);
-  std::set<StackTrieNode*, StackTrieNodeComparator>::iterator i = children_.find(node);
-  delete node;
-  return (i == children_.end()) ? nullptr : *i;
-}
-
-void StackTrieNode::DeleteChildren() {
-  for (auto &child : children_) {
-    if (child != nullptr) {
-      child->DeleteChildren();
-      delete child;
-    }
-  }
-}
-
-}  // namespace art
diff --git a/runtime/profiler.h b/runtime/profiler.h
deleted file mode 100644
index bd29f71..0000000
--- a/runtime/profiler.h
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_PROFILER_H_
-#define ART_RUNTIME_PROFILER_H_
-
-#include <memory>
-#include <ostream>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "barrier.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "globals.h"
-#include "instrumentation.h"
-#include "profiler_options.h"
-#include "os.h"
-#include "safe_map.h"
-#include "method_reference.h"
-
-namespace art {
-
-namespace mirror {
-  class Class;
-}  // namespace mirror
-class ArtMethod;
-class Thread;
-
-typedef std::pair<ArtMethod*, uint32_t> InstructionLocation;
-
-// This class stores the sampled bounded stacks in a trie structure. A path of the trie represents
-// a particular context with the method on top of the stack being a leaf or an internal node of the
-// trie rather than the root.
-class StackTrieNode {
- public:
-  StackTrieNode(MethodReference method, uint32_t dex_pc, uint32_t method_size,
-      StackTrieNode* parent) :
-      parent_(parent), method_(method), dex_pc_(dex_pc),
-      count_(0), method_size_(method_size) {
-  }
-  StackTrieNode() : parent_(nullptr), method_(nullptr, 0),
-      dex_pc_(0), count_(0), method_size_(0) {
-  }
-  StackTrieNode* GetParent() { return parent_; }
-  MethodReference GetMethod() { return method_; }
-  uint32_t GetCount() { return count_; }
-  uint32_t GetDexPC() { return dex_pc_; }
-  uint32_t GetMethodSize() { return method_size_; }
-  void AppendChild(StackTrieNode* child) { children_.insert(child); }
-  StackTrieNode* FindChild(MethodReference method, uint32_t dex_pc);
-  void DeleteChildren();
-  void IncreaseCount() { ++count_; }
-
- private:
-  // Comparator for stack trie node.
-  struct StackTrieNodeComparator {
-    bool operator()(StackTrieNode* node1, StackTrieNode* node2) const {
-      MethodReference mr1 = node1->GetMethod();
-      MethodReference mr2 = node2->GetMethod();
-      if (mr1.dex_file == mr2.dex_file) {
-        if (mr1.dex_method_index == mr2.dex_method_index) {
-          return node1->GetDexPC() < node2->GetDexPC();
-        } else {
-          return mr1.dex_method_index < mr2.dex_method_index;
-        }
-      } else {
-        return mr1.dex_file < mr2.dex_file;
-      }
-    }
-  };
-
-  std::set<StackTrieNode*, StackTrieNodeComparator> children_;
-  StackTrieNode* parent_;
-  MethodReference method_;
-  uint32_t dex_pc_;
-  uint32_t count_;
-  uint32_t method_size_;
-};
-
-//
-// This class holds all the results for all runs of the profiler.  It also
-// counts the number of null methods (where we can't determine the method) and
-// the number of methods in the boot path (where we have already compiled the method).
-//
-// This object is an internal profiler object and uses the same locking as the profiler
-// itself.
-class ProfileSampleResults {
- public:
-  explicit ProfileSampleResults(Mutex& lock);
-  ~ProfileSampleResults();
-
-  void Put(ArtMethod* method) REQUIRES(!lock_);
-  void PutStack(const std::vector<InstructionLocation>& stack_dump) REQUIRES(!lock_);
-  uint32_t Write(std::ostream &os, ProfileDataType type);
-  void ReadPrevious(int fd, ProfileDataType type);
-  void Clear();
-  uint32_t GetNumSamples() { return num_samples_; }
-  void NullMethod() { ++num_null_methods_; }
-  void BootMethod() { ++num_boot_methods_; }
-
- private:
-  uint32_t Hash(ArtMethod* method);
-  static constexpr int kHashSize = 17;
-  Mutex& lock_;                  // Reference to the main profiler lock - we don't need two of them.
-  uint32_t num_samples_;         // Total number of samples taken.
-  uint32_t num_null_methods_;    // Number of samples where can don't know the method.
-  uint32_t num_boot_methods_;    // Number of samples in the boot path.
-
-  typedef std::map<ArtMethod*, uint32_t> Map;  // Map of method vs its count.
-  Map *table[kHashSize];
-
-  typedef std::set<StackTrieNode*> TrieNodeSet;
-  // Map of method hit by profiler vs the set of stack trie nodes for this method.
-  typedef std::map<MethodReference, TrieNodeSet*, MethodReferenceComparator> MethodContextMap;
-  MethodContextMap *method_context_table;
-  StackTrieNode* stack_trie_root_;  // Root of the trie that stores sampled stack information.
-
-  // Map from <pc, context> to counts.
-  typedef std::map<std::pair<uint32_t, std::string>, uint32_t> PreviousContextMap;
-  struct PreviousValue {
-    PreviousValue() : count_(0), method_size_(0), context_map_(nullptr) {}
-    PreviousValue(uint32_t count, uint32_t method_size, PreviousContextMap* context_map)
-      : count_(count), method_size_(method_size), context_map_(context_map) {}
-    uint32_t count_;
-    uint32_t method_size_;
-    PreviousContextMap* context_map_;
-  };
-
-  typedef std::map<std::string, PreviousValue> PreviousProfile;
-  PreviousProfile previous_;
-  uint32_t previous_num_samples_;
-  uint32_t previous_num_null_methods_;     // Number of samples where can don't know the method.
-  uint32_t previous_num_boot_methods_;     // Number of samples in the boot path.
-};
-
-//
-// The BackgroundMethodSamplingProfiler runs in a thread.  Most of the time it is sleeping but
-// occasionally wakes up and counts the number of times a method is called.  Each time
-// it ticks, it looks at the current method and records it in the ProfileSampleResults
-// table.
-//
-// The timing is controlled by a number of variables:
-// 1.  Period: the time between sampling runs.
-// 2.  Interval: the time between each sample in a run.
-// 3.  Duration: the duration of a run.
-//
-// So the profiler thread is sleeping for the 'period' time.  It wakes up and runs for the
-// 'duration'.  The run consists of a series of samples, each of which is 'interval' microseconds
-// apart.  At the end of a run, it writes the results table to a file and goes back to sleep.
-
-class BackgroundMethodSamplingProfiler {
- public:
-  // Start a profile thread with the user-supplied arguments.
-  // Returns true if the profile was started or if it was already running. Returns false otherwise.
-  static bool Start(const std::string& output_filename, const ProfilerOptions& options)
-      REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_,
-               !Locks::profiler_lock_);
-
-  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void Stop() REQUIRES(!Locks::profiler_lock_, !wait_lock_, !Locks::profiler_lock_)
-      NO_THREAD_SAFETY_ANALYSIS;
-  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void Shutdown() REQUIRES(!Locks::profiler_lock_) NO_THREAD_SAFETY_ANALYSIS;
-
-  void RecordMethod(ArtMethod *method) SHARED_REQUIRES(Locks::mutator_lock_);
-  void RecordStack(const std::vector<InstructionLocation>& stack)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool ProcessMethod(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
-  const ProfilerOptions& GetProfilerOptions() const { return options_; }
-
-  Barrier& GetBarrier() {
-    return *profiler_barrier_;
-  }
-
- private:
-  explicit BackgroundMethodSamplingProfiler(
-    const std::string& output_filename, const ProfilerOptions& options);
-
-  // The sampling interval in microseconds is passed as an argument.
-  // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
-  static void* RunProfilerThread(void* arg) REQUIRES(!Locks::profiler_lock_)
-      NO_THREAD_SAFETY_ANALYSIS;
-
-  uint32_t WriteProfile() SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void CleanProfile();
-  uint32_t DumpProfile(std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_);
-  static bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
-
-  static BackgroundMethodSamplingProfiler* profiler_ GUARDED_BY(Locks::profiler_lock_);
-
-  // We need to shut the sample thread down at exit.  Setting this to true will do that.
-  static volatile bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
-
-  // Sampling thread, non-zero when sampling.
-  static pthread_t profiler_pthread_;
-
-  // Some measure of the number of samples that are significant.
-  static constexpr uint32_t kSignificantSamples = 10;
-
-  // The name of the file where profile data will be written.
-  std::string output_filename_;
-  // The options used to start the profiler.
-  const ProfilerOptions& options_;
-
-
-  // Profile condition support.
-  Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  ConditionVariable period_condition_ GUARDED_BY(wait_lock_);
-
-  ProfileSampleResults profile_table_;
-
-  std::unique_ptr<Barrier> profiler_barrier_;
-
-  // Set of methods to be filtered out.  This will probably be rare because
-  // most of the methods we want to be filtered reside in the boot path and
-  // are automatically filtered.
-  typedef std::set<std::string> FilteredMethods;
-  FilteredMethods filtered_methods_;
-
-  DISALLOW_COPY_AND_ASSIGN(BackgroundMethodSamplingProfiler);
-};
-
-//
-// Contains profile data generated from previous runs of the program and stored
-// in a file.  It is used to determine whether to compile a particular method or not.
-class ProfileFile {
- public:
-  class ProfileData {
-   public:
-    ProfileData() : count_(0), method_size_(0), used_percent_(0), top_k_used_percentage_(0) {}
-    ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
-      double used_percent, double top_k_used_percentage) :
-      method_name_(method_name), count_(count), method_size_(method_size),
-      used_percent_(used_percent), top_k_used_percentage_(top_k_used_percentage) {
-      // TODO: currently method_size_ is unused
-      UNUSED(method_size_);
-    }
-
-    double GetUsedPercent() const { return used_percent_; }
-    uint32_t GetCount() const { return count_; }
-    double GetTopKUsedPercentage() const { return top_k_used_percentage_; }
-
-   private:
-    std::string method_name_;       // Method name.
-    uint32_t count_;                // Number of times it has been called.
-    uint32_t method_size_;          // Size of the method on dex instructions.
-    double used_percent_;           // Percentage of how many times this method was called.
-    double top_k_used_percentage_;  // The percentage of the group that comprise K% of the total
-                                    // used methods this methods belongs to.
-  };
-
- public:
-  // Loads profile data from the given file. The new data are merged with any existing data.
-  // Returns true if the file was loaded successfully and false otherwise.
-  bool LoadFile(const std::string& filename);
-
-  // Computes the group that comprise top_k_percentage of the total used methods.
-  bool GetTopKSamples(std::set<std::string>& top_k_methods, double top_k_percentage);
-
-  // If the given method has an entry in the profile table it updates the data
-  // and returns true. Otherwise returns false and leaves the data unchanged.
-  bool GetProfileData(ProfileData* data, const std::string& method_name);
-
- private:
-  // Profile data is stored in a map, indexed by the full method name.
-  typedef std::map<std::string, ProfileData> ProfileMap;
-  ProfileMap profile_map_;
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_PROFILER_H_
diff --git a/runtime/profiler_options.h b/runtime/profiler_options.h
deleted file mode 100644
index 1db2f05..0000000
--- a/runtime/profiler_options.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_PROFILER_OPTIONS_H_
-#define ART_RUNTIME_PROFILER_OPTIONS_H_
-
-#include <string>
-#include <ostream>
-
-namespace art {
-
-enum ProfileDataType {
-  kProfilerMethod,          // Method only
-  kProfilerBoundedStack,    // Methods with Dex PC on top of the stack
-};
-std::ostream& operator<<(std::ostream& os, const ProfileDataType& rhs);
-
-class ProfilerOptions {
- public:
-  static constexpr bool kDefaultEnabled = false;
-  static constexpr uint32_t kDefaultPeriodS = 10;
-  static constexpr uint32_t kDefaultDurationS = 20;
-  static constexpr uint32_t kDefaultIntervalUs = 500;
-  static constexpr double kDefaultBackoffCoefficient = 2.0;
-  static constexpr bool kDefaultStartImmediately = false;
-  static constexpr double kDefaultTopKThreshold = 90.0;
-  static constexpr double kDefaultChangeInTopKThreshold = 10.0;
-  static constexpr ProfileDataType kDefaultProfileData = kProfilerMethod;
-  static constexpr uint32_t kDefaultMaxStackDepth = 3;
-
-  ProfilerOptions() :
-    enabled_(kDefaultEnabled),
-    period_s_(kDefaultPeriodS),
-    duration_s_(kDefaultDurationS),
-    interval_us_(kDefaultIntervalUs),
-    backoff_coefficient_(kDefaultBackoffCoefficient),
-    start_immediately_(kDefaultStartImmediately),
-    top_k_threshold_(kDefaultTopKThreshold),
-    top_k_change_threshold_(kDefaultChangeInTopKThreshold),
-    profile_type_(kDefaultProfileData),
-    max_stack_depth_(kDefaultMaxStackDepth) {}
-
-  ProfilerOptions(bool enabled,
-                 uint32_t period_s,
-                 uint32_t duration_s,
-                 uint32_t interval_us,
-                 double backoff_coefficient,
-                 bool start_immediately,
-                 double top_k_threshold,
-                 double top_k_change_threshold,
-                 ProfileDataType profile_type,
-                 uint32_t max_stack_depth):
-    enabled_(enabled),
-    period_s_(period_s),
-    duration_s_(duration_s),
-    interval_us_(interval_us),
-    backoff_coefficient_(backoff_coefficient),
-    start_immediately_(start_immediately),
-    top_k_threshold_(top_k_threshold),
-    top_k_change_threshold_(top_k_change_threshold),
-    profile_type_(profile_type),
-    max_stack_depth_(max_stack_depth) {}
-
-  bool IsEnabled() const {
-    return enabled_;
-  }
-
-  uint32_t GetPeriodS() const {
-    return period_s_;
-  }
-
-  uint32_t GetDurationS() const {
-    return duration_s_;
-  }
-
-  uint32_t GetIntervalUs() const {
-    return interval_us_;
-  }
-
-  double GetBackoffCoefficient() const {
-    return backoff_coefficient_;
-  }
-
-  bool GetStartImmediately() const {
-    return start_immediately_;
-  }
-
-  double GetTopKThreshold() const {
-    return top_k_threshold_;
-  }
-
-  double GetTopKChangeThreshold() const {
-    return top_k_change_threshold_;
-  }
-
-  ProfileDataType GetProfileType() const {
-    return profile_type_;
-  }
-
-  uint32_t GetMaxStackDepth() const {
-    return max_stack_depth_;
-  }
-
- private:
-  friend std::ostream & operator<<(std::ostream &os, const ProfilerOptions& po) {
-    os << "enabled=" << po.enabled_
-       << ", period_s=" << po.period_s_
-       << ", duration_s=" << po.duration_s_
-       << ", interval_us=" << po.interval_us_
-       << ", backoff_coefficient=" << po.backoff_coefficient_
-       << ", start_immediately=" << po.start_immediately_
-       << ", top_k_threshold=" << po.top_k_threshold_
-       << ", top_k_change_threshold=" << po.top_k_change_threshold_
-       << ", profile_type=" << po.profile_type_
-       << ", max_stack_depth=" << po.max_stack_depth_;
-    return os;
-  }
-
-  friend class ParsedOptions;
-
-  // Whether or not the applications should be profiled.
-  bool enabled_;
-  // Generate profile every n seconds.
-  uint32_t period_s_;
-  // Run profile for n seconds.
-  uint32_t duration_s_;
-  // Microseconds between samples.
-  uint32_t interval_us_;
-  // Coefficient to exponential backoff.
-  double backoff_coefficient_;
-  // Whether the profile should start upon app startup or be delayed by some random offset.
-  bool start_immediately_;
-  // Top K% of samples that are considered relevant when deciding if the app should be recompiled.
-  double top_k_threshold_;
-  // How much the top K% samples needs to change in order for the app to be recompiled.
-  double top_k_change_threshold_;
-  // The type of profile data dumped to the disk.
-  ProfileDataType profile_type_;
-  // The max depth of the stack collected by the profiler
-  uint32_t max_stack_depth_;
-};
-
-}  // namespace art
-
-
-#endif  // ART_RUNTIME_PROFILER_OPTIONS_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index caf5545..21cd2aa 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -118,7 +118,6 @@
 #include "oat_file_manager.h"
 #include "os.h"
 #include "parsed_options.h"
-#include "profiler.h"
 #include "jit/profile_saver.h"
 #include "quick/quick_method_frame_info.h"
 #include "reflection.h"
@@ -628,17 +627,6 @@
   VLOG(startup) << "Runtime::Start exiting";
   finished_starting_ = true;
 
-  if (profiler_options_.IsEnabled() && !profile_output_filename_.empty()) {
-    // User has asked for a profile using -Xenable-profiler.
-    // Create the profile file if it doesn't exist.
-    int fd = open(profile_output_filename_.c_str(), O_RDWR|O_CREAT|O_EXCL, 0660);
-    if (fd >= 0) {
-      close(fd);
-    } else if (errno != EEXIST) {
-      LOG(WARNING) << "Failed to access the profile file. Profiler disabled.";
-    }
-  }
-
   if (trace_config_.get() != nullptr && trace_config_->trace_file != "") {
     ScopedThreadStateChange tsc(self, kWaitingForMethodTracingStart);
     Trace::Start(trace_config_->trace_file.c_str(),
@@ -883,12 +871,13 @@
   for (size_t i = 0; i < dex_filenames.size(); i++) {
     const char* dex_filename = dex_filenames[i].c_str();
     const char* dex_location = dex_locations[i].c_str();
+    static constexpr bool kVerifyChecksum = true;
     std::string error_msg;
     if (!OS::FileExists(dex_filename)) {
       LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
       continue;
     }
-    if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
+    if (!DexFile::Open(dex_filename, dex_location, kVerifyChecksum, &error_msg, dex_files)) {
       LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
       ++failure_count;
     }
@@ -1196,26 +1185,6 @@
         Trace::TraceOutputMode::kFile;
   }
 
-  {
-    auto&& profiler_options = runtime_options.ReleaseOrDefault(Opt::ProfilerOpts);
-    profile_output_filename_ = profiler_options.output_file_name_;
-
-    // TODO: Don't do this, just change ProfilerOptions to include the output file name?
-    ProfilerOptions other_options(
-        profiler_options.enabled_,
-        profiler_options.period_s_,
-        profiler_options.duration_s_,
-        profiler_options.interval_us_,
-        profiler_options.backoff_coefficient_,
-        profiler_options.start_immediately_,
-        profiler_options.top_k_threshold_,
-        profiler_options.top_k_change_threshold_,
-        profiler_options.profile_type_,
-        profiler_options.max_stack_depth_);
-
-    profiler_options_ = other_options;
-  }
-
   // TODO: move this to just be an Trace::Start argument
   Trace::SetDefaultClockSource(runtime_options.GetOrDefault(Opt::ProfileClock));
 
@@ -1758,7 +1727,6 @@
     return;
   }
 
-  profile_output_filename_ = profile_output_filename;
   jit_->StartProfileSaver(profile_output_filename,
                           code_paths,
                           foreign_dex_profile_path,
@@ -2009,9 +1977,4 @@
   return (jit_ != nullptr) && jit_->UseJitCompilation();
 }
 
-// Returns true if profile saving is enabled. GetJit() will be not null in this case.
-bool Runtime::SaveProfileInfo() const {
-  return (jit_ != nullptr) && jit_->SaveProfilingInfo();
-}
-
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index b7f377d..afa8e48 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -36,7 +36,6 @@
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "process_state.h"
-#include "profiler_options.h"
 #include "quick/quick_method_frame_info.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
@@ -192,10 +191,6 @@
     return image_location_;
   }
 
-  const ProfilerOptions& GetProfilerOptions() const {
-    return profiler_options_;
-  }
-
   // Starts a runtime, which may cause threads to be started and code to run.
   bool Start() UNLOCK_FUNCTION(Locks::mutator_lock_);
 
@@ -455,8 +450,6 @@
 
   // Returns true if JIT compilations are enabled. GetJit() will be not null in this case.
   bool UseJitCompilation() const;
-  // Returns true if profile saving is enabled. GetJit() will be not null in this case.
-  bool SaveProfileInfo() const;
 
   void PreZygoteFork();
   bool InitZygote();
@@ -782,9 +775,6 @@
 
   const bool is_running_on_memory_tool_;
 
-  std::string profile_output_filename_;
-  ProfilerOptions profiler_options_;
-
   std::unique_ptr<TraceConfig> trace_config_;
 
   instrumentation::Instrumentation instrumentation_;
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 635ff51..31206b5 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -75,7 +75,6 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        JITInvokeTransitionWeight)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
-RUNTIME_OPTIONS_KEY (bool,                JITSaveProfilingInfo,           false)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
                                                                           MsToNs(100 * 1000))  // 100s
@@ -105,7 +104,7 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        MethodTraceFileSize,            10 * MB)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTraceStreaming)
 RUNTIME_OPTIONS_KEY (TraceClockSource,    ProfileClock,                   kDefaultTraceClockSource)  // -Xprofile:
-RUNTIME_OPTIONS_KEY (TestProfilerOptions, ProfilerOpts)  // -Xenable-profiler, -Xprofile-*
+RUNTIME_OPTIONS_KEY (ProfileSaverOptions, ProfileSaverOpts)  // -Xjitsaveprofilinginfo, -Xps-*
 RUNTIME_OPTIONS_KEY (std::string,         Compiler)
 RUNTIME_OPTIONS_KEY (std::vector<std::string>, \
                                           CompilerOptions)  // -Xcompiler-option ...
diff --git a/runtime/runtime_options.h b/runtime/runtime_options.h
index ab69d4f..5fcb86e 100644
--- a/runtime/runtime_options.h
+++ b/runtime/runtime_options.h
@@ -29,8 +29,8 @@
 #include "jit/jit_code_cache.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
-#include "profiler_options.h"
 #include "arch/instruction_set.h"
+#include "jit/profile_saver_options.h"
 #include "verifier/verify_mode.h"
 #include <stdio.h>
 #include <stdarg.h>
@@ -41,7 +41,6 @@
 class DexFile;
 struct XGcOption;
 struct BackgroundGcOption;
-struct TestProfilerOptions;
 
 #define DECLARE_KEY(Type, Name) static const Key<Type> Name
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index a5ca527..1d913f2 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -131,16 +131,10 @@
       const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
       CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
       DCHECK(walk_kind_ != StackWalkKind::kSkipInlinedFrames);
-      bool allow_resolve = walk_kind_ != StackWalkKind::kIncludeInlinedFramesNoResolve;
-      return allow_resolve
-          ? GetResolvedMethod<true>(*GetCurrentQuickFrame(),
-                                    inline_info,
-                                    encoding.inline_info_encoding,
-                                    depth_in_stack_map)
-          : GetResolvedMethod<false>(*GetCurrentQuickFrame(),
-                                     inline_info,
-                                     encoding.inline_info_encoding,
-                                     depth_in_stack_map);
+      return GetResolvedMethod(*GetCurrentQuickFrame(),
+                               inline_info,
+                               encoding.inline_info_encoding,
+                               depth_in_stack_map);
     } else {
       return *cur_quick_frame_;
     }
@@ -791,8 +785,7 @@
         cur_oat_quick_method_header_ = method->GetOatQuickMethodHeader(cur_quick_frame_pc_);
         SanityCheckFrame();
 
-        if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames ||
-             walk_kind_ == StackWalkKind::kIncludeInlinedFramesNoResolve)
+        if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames)
             && (cur_oat_quick_method_header_ != nullptr)
             && cur_oat_quick_method_header_->IsOptimized()) {
           CodeInfo code_info = cur_oat_quick_method_header_->GetOptimizedCodeInfo();
diff --git a/runtime/stack.h b/runtime/stack.h
index e77ab46..84b665e 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -568,7 +568,6 @@
   // when walking the stack.
   enum class StackWalkKind {
     kIncludeInlinedFrames,
-    kIncludeInlinedFramesNoResolve,
     kSkipInlinedFrames,
   };
 
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 7c50f97..4647d67 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -19,6 +19,7 @@
 
 #include "base/bit_vector.h"
 #include "base/bit_utils.h"
+#include "dex_file.h"
 #include "memory_region.h"
 #include "leb128.h"
 
@@ -892,7 +893,11 @@
     total_bit_size_ += MinimumBitsToStore(method_index_max);
 
     dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
-    total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+    // Note: We're not encoding the dex pc if there is none. That's the case
+    // for an intrinsified native method, such as String.charAt().
+    if (dex_pc_max != DexFile::kDexNoIndex) {
+      total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+    }
 
     invoke_type_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
     total_bit_size_ += MinimumBitsToStore(invoke_type_max);
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index d98f82a..3fd66a7 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -201,12 +201,22 @@
                  << " state=" << old_state_and_flags.as_struct.state;
     } else if ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
       // Wait while our suspend count is non-zero.
-      MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+
+      // We pass null to the MutexLock as we may be in a situation where the
+      // runtime is shutting down. Guarding ourselves from that situation
+      // requires to take the shutdown lock, which is undesirable here.
+      Thread* thread_to_pass = nullptr;
+      if (kIsDebugBuild && !IsDaemon()) {
+        // We know we can make our debug locking checks on non-daemon threads,
+        // so re-enable them on debug builds.
+        thread_to_pass = this;
+      }
+      MutexLock mu(thread_to_pass, *Locks::thread_suspend_count_lock_);
       old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
       DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
         // Re-check when Thread::resume_cond_ is notified.
-        Thread::resume_cond_->Wait(this);
+        Thread::resume_cond_->Wait(thread_to_pass);
         old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
         DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       }
diff --git a/runtime/thread.h b/runtime/thread.h
index 7ae9be5..ab24625 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1060,6 +1060,8 @@
     return tlsPtr_.mterp_alt_ibase;
   }
 
+  // Notify that a signal is being handled. This is to protect us from doing recursive
+  // NPE handling after a SIGSEGV.
   void NoteSignalBeingHandled() {
     if (tls32_.handling_signal_) {
       LOG(FATAL) << "Detected signal while processing a signal";
@@ -1349,8 +1351,8 @@
       instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr),
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
-      last_no_thread_suspension_cause(nullptr), thread_local_objects(0),
-      thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
+      last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr),
+      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0),
       mterp_current_ibase(nullptr), mterp_default_ibase(nullptr), mterp_alt_ibase(nullptr),
       thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr),
       nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr),
@@ -1465,12 +1467,12 @@
     QuickEntryPoints quick_entrypoints;
 
     // Thread-local allocation pointer.
-    size_t thread_local_objects;
     uint8_t* thread_local_start;
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
+    size_t thread_local_objects;
 
     // Mterp jump table bases.
     void* mterp_current_ibase;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 97bcb7d..16ef0ff 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -613,11 +613,7 @@
             PLOG(FATAL) << "futex wait failed for SuspendAllInternal()";
           }
         }
-      } else {
-        cur_val = pending_threads.LoadRelaxed();
-        CHECK_EQ(cur_val, 0);
-        break;
-      }
+      }  // else re-check pending_threads in the next iteration (this may be a spurious wake-up).
 #else
       // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set.
 #endif
diff --git a/test/004-ThreadStress/check b/test/004-ThreadStress/check
index ffbb8cf..77e4cdb 100755
--- a/test/004-ThreadStress/check
+++ b/test/004-ThreadStress/check
@@ -14,5 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Only compare the last line.
-tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
+# Do not compare numbers, so replace numbers with 'N'.
+sed '-es/[0-9][0-9]*/N/g' "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/004-ThreadStress/expected.txt b/test/004-ThreadStress/expected.txt
index a26fb4f..772faf6 100644
--- a/test/004-ThreadStress/expected.txt
+++ b/test/004-ThreadStress/expected.txt
@@ -1 +1,11 @@
+JNI_OnLoad called
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Starting worker for N
+Finishing worker
+Finishing worker
+Finishing worker
+Finishing worker
 Finishing worker
diff --git a/test/004-ThreadStress/src/Main.java b/test/004-ThreadStress/src/Main.java
index acd8e8b..5cae398 100644
--- a/test/004-ThreadStress/src/Main.java
+++ b/test/004-ThreadStress/src/Main.java
@@ -93,9 +93,7 @@
 
                 killTemp = osClass.getDeclaredMethod("kill", int.class, int.class);
             } catch (Exception e) {
-                if (!e.getClass().getName().equals("ErrnoException")) {
-                    e.printStackTrace(System.out);
-                }
+                Main.printThrowable(e);
             }
 
             pid = pidTemp;
@@ -109,8 +107,8 @@
                 kill.invoke(null, pid, sigquit);
             } catch (OutOfMemoryError e) {
             } catch (Exception e) {
-                if (!e.getClass().getName().equals("ErrnoException")) {
-                    e.printStackTrace(System.out);
+                if (!e.getClass().getName().equals(Main.errnoExceptionName)) {
+                    Main.printThrowable(e);
                 }
             }
             return true;
@@ -268,6 +266,7 @@
     }
 
     public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
         parseAndRun(args);
     }
 
@@ -399,12 +398,21 @@
             System.out.println(frequencyMap);
         }
 
-        runTest(numberOfThreads, numberOfDaemons, operationsPerThread, lock, frequencyMap);
+        try {
+            runTest(numberOfThreads, numberOfDaemons, operationsPerThread, lock, frequencyMap);
+        } catch (Throwable t) {
+            // In this case, the output should not contain all the required
+            // "Finishing worker" lines.
+            Main.printThrowable(t);
+        }
     }
 
     public static void runTest(final int numberOfThreads, final int numberOfDaemons,
                                final int operationsPerThread, final Object lock,
                                Map<Operation, Double> frequencyMap) throws Exception {
+        final Thread mainThread = Thread.currentThread();
+        final Barrier startBarrier = new Barrier(numberOfThreads + numberOfDaemons + 1);
+
         // Each normal thread is going to do operationsPerThread
         // operations. Each daemon thread will loop over all
         // the operations and will not stop.
@@ -438,8 +446,9 @@
             }
             // Randomize the operation order
             Collections.shuffle(Arrays.asList(operations));
-            threadStresses[t] = t < numberOfThreads ? new Main(lock, t, operations) :
-                                                      new Daemon(lock, t, operations);
+            threadStresses[t] = (t < numberOfThreads)
+                    ? new Main(lock, t, operations)
+                    : new Daemon(lock, t, operations, mainThread, startBarrier);
         }
 
         // Enable to dump operation counts per thread to make sure its
@@ -474,32 +483,41 @@
             runners[r] = new Thread("Runner thread " + r) {
                 final Main threadStress = ts;
                 public void run() {
-                    int id = threadStress.id;
-                    System.out.println("Starting worker for " + id);
-                    while (threadStress.nextOperation < operationsPerThread) {
-                        try {
-                            Thread thread = new Thread(ts, "Worker thread " + id);
-                            thread.start();
+                    try {
+                        int id = threadStress.id;
+                        // No memory hungry task are running yet, so println() should succeed.
+                        System.out.println("Starting worker for " + id);
+                        // Wait until all runners and daemons reach the starting point.
+                        startBarrier.await();
+                        // Run the stress tasks.
+                        while (threadStress.nextOperation < operationsPerThread) {
                             try {
+                                Thread thread = new Thread(ts, "Worker thread " + id);
+                                thread.start();
                                 thread.join();
-                            } catch (InterruptedException e) {
-                            }
 
-                            System.out.println("Thread exited for " + id + " with "
-                                               + (operationsPerThread - threadStress.nextOperation)
-                                               + " operations remaining.");
-                        } catch (OutOfMemoryError e) {
-                            // Ignore OOME since we need to print "Finishing worker" for the test
-                            // to pass.
+                                if (DEBUG) {
+                                    System.out.println(
+                                        "Thread exited for " + id + " with " +
+                                        (operationsPerThread - threadStress.nextOperation) +
+                                        " operations remaining.");
+                                }
+                            } catch (OutOfMemoryError e) {
+                                // Ignore OOME since we need to print "Finishing worker"
+                                // for the test to pass. This OOM can come from creating
+                                // the Thread or from the DEBUG output.
+                                // Note that the Thread creation may fail repeatedly,
+                                // preventing the runner from making any progress,
+                                // especially if the number of daemons is too high.
+                            }
                         }
-                    }
-                    // Keep trying to print "Finishing worker" until it succeeds.
-                    while (true) {
-                        try {
-                            System.out.println("Finishing worker");
-                            break;
-                        } catch (OutOfMemoryError e) {
-                        }
+                        // Print "Finishing worker" through JNI to avoid OOME.
+                        Main.printString(Main.finishingWorkerMessage);
+                    } catch (Throwable t) {
+                        Main.printThrowable(t);
+                        // Interrupt the main thread, so that it can orderly shut down
+                        // instead of waiting indefinitely for some Barrier.
+                        mainThread.interrupt();
                     }
                 }
             };
@@ -532,6 +550,9 @@
         for (int r = 0; r < runners.length; r++) {
             runners[r].start();
         }
+        // Wait for all threads to reach the starting point.
+        startBarrier.await();
+        // Wait for runners to finish.
         for (int r = 0; r < runners.length; r++) {
             runners[r].join();
         }
@@ -574,8 +595,14 @@
     }
 
     private static class Daemon extends Main {
-        private Daemon(Object lock, int id, Operation[] operations) {
+        private Daemon(Object lock,
+                       int id,
+                       Operation[] operations,
+                       Thread mainThread,
+                       Barrier startBarrier) {
             super(lock, id, operations);
+            this.mainThread = mainThread;
+            this.startBarrier = startBarrier;
         }
 
         public void run() {
@@ -583,26 +610,74 @@
                 if (DEBUG) {
                     System.out.println("Starting ThreadStress Daemon " + id);
                 }
-                int i = 0;
-                while (true) {
-                    Operation operation = operations[i];
-                    if (DEBUG) {
-                        System.out.println("ThreadStress Daemon " + id
-                                           + " operation " + i
-                                           + " is " + operation);
+                startBarrier.await();
+                try {
+                    int i = 0;
+                    while (true) {
+                        Operation operation = operations[i];
+                        if (DEBUG) {
+                            System.out.println("ThreadStress Daemon " + id
+                                               + " operation " + i
+                                               + " is " + operation);
+                        }
+                        operation.perform();
+                        i = (i + 1) % operations.length;
                     }
-                    operation.perform();
-                    i = (i + 1) % operations.length;
+                } catch (OutOfMemoryError e) {
+                    // Catch OutOfMemoryErrors since these can cause the test to fail it they print
+                    // the stack trace after "Finishing worker". Note that operations should catch
+                    // their own OOME, this guards only agains OOME in the DEBUG output.
                 }
-            } catch (OutOfMemoryError e) {
-                // Catch OutOfMemoryErrors since these can cause the test to fail it they print
-                // the stack trace after "Finishing worker".
-            } finally {
                 if (DEBUG) {
                     System.out.println("Finishing ThreadStress Daemon for " + id);
                 }
+            } catch (Throwable t) {
+                Main.printThrowable(t);
+                // Interrupt the main thread, so that it can orderly shut down
+                // instead of waiting indefinitely for some Barrier.
+                mainThread.interrupt();
             }
         }
+
+        final Thread mainThread;
+        final Barrier startBarrier;
     }
 
+    // Note: java.util.concurrent.CyclicBarrier.await() allocates memory and may throw OOM.
+    // That is highly undesirable in this test, so we use our own simple barrier class.
+    // The only memory allocation that can happen here is the lock inflation which uses
+    // a native allocation. As such, it should succeed even if the Java heap is full.
+    // If the native allocation surprisingly fails, the program shall abort().
+    private static class Barrier {
+        public Barrier(int initialCount) {
+            count = initialCount;
+        }
+
+        public synchronized void await() throws InterruptedException {
+            --count;
+            if (count != 0) {
+                do {
+                    wait();
+                } while (count != 0);  // Check for spurious wakeup.
+            } else {
+                notifyAll();
+            }
+        }
+
+        private int count;
+    }
+
+    // Printing a String/Throwable through JNI requires only native memory and space
+    // in the local reference table, so it should succeed even if the Java heap is full.
+    private static native void printString(String s);
+    private static native void printThrowable(Throwable t);
+
+    static final String finishingWorkerMessage;
+    static final String errnoExceptionName;
+    static {
+        // We pre-allocate the strings in class initializer to avoid const-string
+        // instructions in code using these strings later as they may throw OOME.
+        finishingWorkerMessage = "Finishing worker\n";
+        errnoExceptionName = "ErrnoException";
+    }
 }
diff --git a/test/004-ThreadStress/thread_stress.cc b/test/004-ThreadStress/thread_stress.cc
new file mode 100644
index 0000000..573c352
--- /dev/null
+++ b/test/004-ThreadStress/thread_stress.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "jni.h"
+#include "mirror/string.h"
+#include "mirror/throwable.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_printString(JNIEnv*, jclass, jstring s) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::cout << soa.Decode<mirror::String*>(s)->ToModifiedUtf8();
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_printThrowable(JNIEnv*, jclass, jthrowable t) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::cout << soa.Decode<mirror::Throwable*>(t)->Dump();
+}
+
+}  // namespace art
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index b2f905e..9d4618a 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -39,16 +39,24 @@
     }
   }
 
-  private static Unsafe getUnsafe() throws Exception {
+  private static Unsafe getUnsafe() throws NoSuchFieldException, IllegalAccessException {
     Class<?> unsafeClass = Unsafe.class;
     Field f = unsafeClass.getDeclaredField("theUnsafe");
     f.setAccessible(true);
     return (Unsafe) f.get(null);
   }
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) throws NoSuchFieldException, IllegalAccessException {
     System.loadLibrary(args[0]);
     Unsafe unsafe = getUnsafe();
+
+    testArrayBaseOffset(unsafe);
+    testArrayIndexScale(unsafe);
+    testGetAndPutAndCAS(unsafe);
+    testGetAndPutVolatile(unsafe);
+  }
+
+  private static void testArrayBaseOffset(Unsafe unsafe) {
     check(unsafe.arrayBaseOffset(boolean[].class), vmArrayBaseOffset(boolean[].class),
         "Unsafe.arrayBaseOffset(boolean[])");
     check(unsafe.arrayBaseOffset(byte[].class), vmArrayBaseOffset(byte[].class),
@@ -65,7 +73,9 @@
         "Unsafe.arrayBaseOffset(long[])");
     check(unsafe.arrayBaseOffset(Object[].class), vmArrayBaseOffset(Object[].class),
         "Unsafe.arrayBaseOffset(Object[])");
+  }
 
+  private static void testArrayIndexScale(Unsafe unsafe) {
     check(unsafe.arrayIndexScale(boolean[].class), vmArrayIndexScale(boolean[].class),
         "Unsafe.arrayIndexScale(boolean[])");
     check(unsafe.arrayIndexScale(byte[].class), vmArrayIndexScale(byte[].class),
@@ -82,7 +92,9 @@
         "Unsafe.arrayIndexScale(long[])");
     check(unsafe.arrayIndexScale(Object[].class), vmArrayIndexScale(Object[].class),
         "Unsafe.arrayIndexScale(Object[])");
+  }
 
+  private static void testGetAndPutAndCAS(Unsafe unsafe) throws NoSuchFieldException {
     TestClass t = new TestClass();
 
     int intValue = 12345678;
@@ -185,12 +197,58 @@
     }
   }
 
+  private static void testGetAndPutVolatile(Unsafe unsafe) throws NoSuchFieldException {
+    TestVolatileClass tv = new TestVolatileClass();
+
+    int intValue = 12345678;
+    Field volatileIntField = TestVolatileClass.class.getDeclaredField("volatileIntVar");
+    long volatileIntOffset = unsafe.objectFieldOffset(volatileIntField);
+    check(unsafe.getIntVolatile(tv, volatileIntOffset),
+          0,
+          "Unsafe.getIntVolatile(Object, long) - initial");
+    unsafe.putIntVolatile(tv, volatileIntOffset, intValue);
+    check(tv.volatileIntVar, intValue, "Unsafe.putIntVolatile(Object, long, int)");
+    check(unsafe.getIntVolatile(tv, volatileIntOffset),
+          intValue,
+          "Unsafe.getIntVolatile(Object, long)");
+
+    long longValue = 1234567887654321L;
+    Field volatileLongField = TestVolatileClass.class.getDeclaredField("volatileLongVar");
+    long volatileLongOffset = unsafe.objectFieldOffset(volatileLongField);
+    check(unsafe.getLongVolatile(tv, volatileLongOffset),
+          0,
+          "Unsafe.getLongVolatile(Object, long) - initial");
+    unsafe.putLongVolatile(tv, volatileLongOffset, longValue);
+    check(tv.volatileLongVar, longValue, "Unsafe.putLongVolatile(Object, long, long)");
+    check(unsafe.getLongVolatile(tv, volatileLongOffset),
+          longValue,
+          "Unsafe.getLongVolatile(Object, long)");
+
+    Object objectValue = new Object();
+    Field volatileObjectField = TestVolatileClass.class.getDeclaredField("volatileObjectVar");
+    long volatileObjectOffset = unsafe.objectFieldOffset(volatileObjectField);
+    check(unsafe.getObjectVolatile(tv, volatileObjectOffset),
+          null,
+          "Unsafe.getObjectVolatile(Object, long) - initial");
+    unsafe.putObjectVolatile(tv, volatileObjectOffset, objectValue);
+    check(tv.volatileObjectVar, objectValue, "Unsafe.putObjectVolatile(Object, long, Object)");
+    check(unsafe.getObjectVolatile(tv, volatileObjectOffset),
+          objectValue,
+          "Unsafe.getObjectVolatile(Object, long)");
+  }
+
   private static class TestClass {
     public int intVar = 0;
     public long longVar = 0;
     public Object objectVar = null;
   }
 
+  private static class TestVolatileClass {
+    public volatile int volatileIntVar = 0;
+    public volatile long volatileLongVar = 0;
+    public volatile Object volatileObjectVar = null;
+  }
+
   private static native int vmArrayBaseOffset(Class clazz);
   private static native int vmArrayIndexScale(Class clazz);
 }
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index d878e69..dd89d64 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -33,7 +33,7 @@
 14 (class java.lang.Short)
 [java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
 [private final int java.lang.String.count, private int java.lang.String.hash, private static final java.io.ObjectStreamField[] java.lang.String.serialPersistentFields, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
-[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int)]
+[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private boolean java.lang.String.nonSyncContentEquals(java.lang.AbstractStringBuilder), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public byte[] java.lang.String.getBytes(), public byte[] java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public byte[] java.lang.String.getBytes(java.nio.charset.Charset), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public java.lang.String[] java.lang.String.split(java.lang.String), public java.lang.String[] java.lang.String.split(java.lang.String,int), public native char java.lang.String.charAt(int), public native char[] java.lang.String.toCharArray(), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.CharSequence[]), public static java.lang.String java.lang.String.join(java.lang.CharSequence,java.lang.Iterable), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int), static int java.lang.String.indexOf(char[],int,int,char[],int,int,int), static int java.lang.String.indexOf(java.lang.String,java.lang.String,int), static int java.lang.String.lastIndexOf(char[],int,int,char[],int,int,int), static int java.lang.String.lastIndexOf(java.lang.String,java.lang.String,int), void java.lang.String.getChars(char[],int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/201-built-in-exception-detail-messages/src/Main.java b/test/201-built-in-exception-detail-messages/src/Main.java
index 52d4259..f0bb6dd 100644
--- a/test/201-built-in-exception-detail-messages/src/Main.java
+++ b/test/201-built-in-exception-detail-messages/src/Main.java
@@ -461,7 +461,7 @@
       "hello there".substring(9,14);
       fail();
     } catch (StringIndexOutOfBoundsException ex) {
-      assertEquals("length=11; regionStart=9; regionLength=5", ex.getMessage());
+      assertEquals("length=11; index=14", ex.getMessage());
     }
   }
 }
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index 08b6cec..36f14d8 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -30,6 +30,11 @@
   public void $noinline$f() {
     throw new RuntimeException();
   }
+
+  public int $inline$h(boolean cond) {
+    Super obj = (cond ? this : null);
+    return obj.hashCode();
+  }
 }
 
 class SubclassA extends Super {
@@ -620,6 +625,46 @@
     o.mainField = 0;
   }
 
+  /// CHECK-START: void Main.testThisArgumentMoreSpecific(boolean) inliner (before)
+  /// CHECK-DAG:     <<Arg:l\d+>>   NewInstance
+  /// CHECK-DAG:                    InvokeVirtual [<<Arg>>,{{z\d+}}] method_name:Super.$inline$h
+
+  /// CHECK-START: void Main.testThisArgumentMoreSpecific(boolean) inliner (after)
+  /// CHECK-DAG:     <<Arg:l\d+>>   NewInstance
+  /// CHECK-DAG:     <<Null:l\d+>>  NullConstant
+  /// CHECK-DAG:     <<Phi:l\d+>>   Phi [<<Arg>>,<<Null>>] klass:SubclassA
+  /// CHECK-DAG:     <<NCPhi:l\d+>> NullCheck [<<Phi>>]
+  /// CHECK-DAG:                    InvokeVirtual [<<NCPhi>>] method_name:Super.hashCode
+
+  public void testThisArgumentMoreSpecific(boolean cond) {
+    // Inlining method from Super will build it with `this` typed as Super.
+    // Running RTP will sharpen it to SubclassA.
+    SubclassA obj = new SubclassA();
+    ((Super) obj).$inline$h(cond);
+  }
+
+  public static int $inline$hashCode(Super obj) {
+    return obj.hashCode();
+  }
+
+  /// CHECK-START: void Main.testExplicitArgumentMoreSpecific(SubclassA) inliner (before)
+  /// CHECK-DAG:     <<Arg:l\d+>>   ParameterValue klass:SubclassA
+  // Note: The ArtMethod* (typed as int or long) is optional after sharpening.
+  /// CHECK-DAG:                    InvokeStaticOrDirect [<<Arg>>{{(,[ij]\d+)?}}] method_name:Main.$inline$hashCode
+
+  /// CHECK-START: void Main.testExplicitArgumentMoreSpecific(SubclassA) inliner (after)
+  /// CHECK-DAG:     <<Arg:l\d+>>   ParameterValue klass:SubclassA
+  /// CHECK-DAG:     <<NCArg:l\d+>> NullCheck [<<Arg>>] klass:SubclassA
+  /// CHECK-DAG:                    InvokeVirtual [<<NCArg>>] method_name:Super.hashCode
+
+  public void testExplicitArgumentMoreSpecific(SubclassA obj) {
+    // Inlining a method will build it with reference types from its signature,
+    // here the callee graph is built with Super as the type of its only argument.
+    // Running RTP after its ParameterValue instructions are replaced with actual
+    // arguments will type the inner graph more precisely.
+    $inline$hashCode(obj);
+  }
+
   /// CHECK-START: void Main.testPhiHasOnlyNullInputs(boolean) inliner (before)
   /// CHECK:      <<Int:i\d+>>       IntConstant 0
   /// CHECK:      <<Phi:l\d+>>       Phi klass:Main exact:false
diff --git a/test/458-checker-instruction-simplification/smali/SmaliTests.smali b/test/458-checker-instruction-simplification/smali/SmaliTests.smali
index ede599b..6845961 100644
--- a/test/458-checker-instruction-simplification/smali/SmaliTests.smali
+++ b/test/458-checker-instruction-simplification/smali/SmaliTests.smali
@@ -191,3 +191,139 @@
 
 .end method
 
+## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const7:i\d+>>    IntConstant 7
+## CHECK-DAG:     <<Const8:i\d+>>    IntConstant 8
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const7>>]
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Add>>,<<Const8>>]
+## CHECK-DAG:                        Return [<<Sub>>]
+
+## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<ConstM1:i\d+>>   IntConstant -1
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<ConstM1>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+.method public static AddSubConst(I)I
+    .registers 3
+
+    .prologue
+    add-int/lit8 v0, p0, 7
+
+    const/16 v1, 8
+
+    sub-int v0, v0, v1
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const3:i\d+>>    IntConstant 3
+## CHECK-DAG:     <<Const4:i\d+>>    IntConstant 4
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<ArgValue>>,<<Const3>>]
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<Sub>>,<<Const4>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const1>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+.method public static SubAddConst(I)I
+    .registers 2
+
+    .prologue
+    const/4 v0, 3
+
+    sub-int v0, p0, v0
+
+    add-int/lit8 v0, v0, 4
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const9:i\d+>>    IntConstant 9
+## CHECK-DAG:     <<Const10:i\d+>>   IntConstant 10
+## CHECK-DAG:     <<Sub1:i\d+>>      Sub [<<ArgValue>>,<<Const9>>]
+## CHECK-DAG:     <<Sub2:i\d+>>      Sub [<<Sub1>>,<<Const10>>]
+## CHECK-DAG:                        Return [<<Sub2>>]
+
+## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<ConstM19:i\d+>>  IntConstant -19
+## CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<ConstM19>>]
+## CHECK-DAG:                        Return [<<Add>>]
+
+.method public static SubSubConst1(I)I
+    .registers 3
+
+    .prologue
+    const/16 v1, 9
+
+    sub-int v0, p0, v1
+
+    const/16 v1, 10
+
+    sub-int v0, v0, v1
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const11:i\d+>>   IntConstant 11
+## CHECK-DAG:     <<Const12:i\d+>>   IntConstant 12
+## CHECK-DAG:     <<Sub1:i\d+>>      Sub [<<Const11>>,<<ArgValue>>]
+## CHECK-DAG:     <<Sub2:i\d+>>      Sub [<<Sub1>>,<<Const12>>]
+## CHECK-DAG:                        Return [<<Sub2>>]
+
+## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<ConstM1:i\d+>>   IntConstant -1
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<ConstM1>>,<<ArgValue>>]
+## CHECK-DAG:                        Return [<<Sub>>]
+
+.method public static SubSubConst2(I)I
+    .registers 3
+
+    .prologue
+    rsub-int/lit8 v0, p0, 11
+
+    const/16 v1, 12
+
+    sub-int v0, v0, v1
+
+    return v0
+.end method
+
+## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (before)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const15:i\d+>>   IntConstant 15
+## CHECK-DAG:     <<Const16:i\d+>>   IntConstant 16
+## CHECK-DAG:     <<Sub1:i\d+>>      Sub [<<ArgValue>>,<<Const16>>]
+## CHECK-DAG:     <<Sub2:i\d+>>      Sub [<<Const15>>,<<Sub1>>]
+## CHECK-DAG:                        Return [<<Sub2>>]
+
+## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (after)
+## CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+## CHECK-DAG:     <<Const31:i\d+>>   IntConstant 31
+## CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const31>>,<<ArgValue>>]
+## CHECK-DAG:                        Return [<<Sub>>]
+
+.method public static SubSubConst3(I)I
+    .registers 2
+
+    .prologue
+    const/16 v0, 16
+
+    sub-int v0, p0, v0
+
+    rsub-int/lit8 v0, v0, 15
+
+    return v0
+.end method
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 53c2e0b..c717eaa 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -18,6 +18,8 @@
 
 public class Main {
 
+  static boolean doThrow = false;
+
   public static void assertBooleanEquals(boolean expected, boolean result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -58,41 +60,66 @@
    * Tiny programs exercising optimizations of arithmetic identities.
    */
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>  LongConstant 0
   /// CHECK-DAG:     <<Add:j\d+>>     Add [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Add0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Add0(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Add
 
-  public static long Add0(long arg) {
+  public static long $noinline$Add0(long arg) {
+    if (doThrow) { throw new Error(); }
     return 0 + arg;
   }
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Const2:i\d+>>    IntConstant 2
+  /// CHECK-DAG:     <<ConstM3:i\d+>>   IntConstant -3
+  /// CHECK-DAG:     <<Const4:i\d+>>    IntConstant 4
+  /// CHECK-DAG:     <<Add1:i\d+>>      Add [<<ArgValue>>,<<Const1>>]
+  /// CHECK-DAG:     <<Add2:i\d+>>      Add [<<Add1>>,<<Const2>>]
+  /// CHECK-DAG:     <<Add3:i\d+>>      Add [<<Add2>>,<<ConstM3>>]
+  /// CHECK-DAG:     <<Add4:i\d+>>      Add [<<Add3>>,<<Const4>>]
+  /// CHECK-DAG:                        Return [<<Add4>>]
+
+  /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const4:i\d+>>    IntConstant 4
+  /// CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const4>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static int $noinline$AddAddSubAddConst(int arg) {
+    if (doThrow) { throw new Error(); }
+    return arg + 1 + 2 - 3 + 4;
+  }
+
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<ConstF:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<And:i\d+>>     And [<<Arg>>,<<ConstF>>]
   /// CHECK-DAG:                      Return [<<And>>]
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (after)
   /// CHECK-NOT:                      And
 
-  public static int AndAllOnes(int arg) {
+  public static int $noinline$AndAllOnes(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg & -1;
   }
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const15:i\d+>>  IntConstant 15
@@ -100,20 +127,21 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const15>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const28>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And15(int) instruction_simplifier (after)
   /// CHECK-NOT:                       And
 
-  public static int UShr28And15(int arg) {
+  public static int $noinline$UShr28And15(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 15;
   }
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const15:j\d+>>  LongConstant 15
@@ -121,20 +149,21 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const15>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const60>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And15(long) instruction_simplifier (after)
   /// CHECK-NOT:                       And
 
-  public static long UShr60And15(long arg) {
+  public static long $noinline$UShr60And15(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 15;
   }
 
-  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$UShr28And7(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
@@ -142,7 +171,7 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$UShr28And7(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const28:i\d+>>  IntConstant 28
   /// CHECK-DAG:     <<Const7:i\d+>>   IntConstant 7
@@ -150,11 +179,12 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static int UShr28And7(int arg) {
+  public static int $noinline$UShr28And7(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 28) & 7;
   }
 
-  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$UShr60And7(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
@@ -162,7 +192,7 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr60And7(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const60:i\d+>>  IntConstant 60
   /// CHECK-DAG:     <<Const7:j\d+>>   LongConstant 7
@@ -170,11 +200,12 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<UShr>>,<<Const7>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static long UShr60And7(long arg) {
+  public static long $noinline$UShr60And7(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >>> 60) & 7;
   }
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const255:i\d+>> IntConstant 255
@@ -182,21 +213,22 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const255>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<UShr:i\d+>>     UShr [<<Arg>>,<<Const24>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And255(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
   /// CHECK-NOT:                       And
 
-  public static int Shr24And255(int arg) {
+  public static int $noinline$Shr24And255(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 255;
   }
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const255:j\d+>> LongConstant 255
@@ -204,21 +236,22 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const255>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const56>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And255(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
   /// CHECK-NOT:                       And
 
-  public static long Shr56And255(long arg) {
+  public static long $noinline$Shr56And255(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 255;
   }
 
-  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shr24And127(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
@@ -226,7 +259,7 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shr24And127(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const24:i\d+>>  IntConstant 24
   /// CHECK-DAG:     <<Const127:i\d+>> IntConstant 127
@@ -234,11 +267,12 @@
   /// CHECK-DAG:     <<And:i\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static int Shr24And127(int arg) {
+  public static int $noinline$Shr24And127(int arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 24) & 127;
   }
 
-  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr56And127(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
@@ -246,7 +280,7 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr56And127(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const56:i\d+>>  IntConstant 56
   /// CHECK-DAG:     <<Const127:j\d+>> LongConstant 127
@@ -254,267 +288,361 @@
   /// CHECK-DAG:     <<And:j\d+>>      And [<<Shr>>,<<Const127>>]
   /// CHECK-DAG:                       Return [<<And>>]
 
-  public static long Shr56And127(long arg) {
+  public static long $noinline$Shr56And127(long arg) {
+    if (doThrow) { throw new Error(); }
     return (arg >> 56) & 127;
   }
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Div:j\d+>>     Div [<<Arg>>,<<Const1>>]
   /// CHECK-DAG:                      Return [<<Div>>]
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Div1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Div1(long) instruction_simplifier (after)
   /// CHECK-NOT:                      Div
 
-  public static long Div1(long arg) {
+  public static long $noinline$Div1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 1;
   }
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Div:i\d+>>      Div [<<Arg>>,<<ConstN1>>]
   /// CHECK-DAG:                       Return [<<Div>>]
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$DivN1(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Div
 
-  public static int DivN1(int arg) {
+  public static int $noinline$DivN1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -1;
   }
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Const1:j\d+>>  LongConstant 1
   /// CHECK-DAG:     <<Mul:j\d+>>     Mul [<<Const1>>,<<Arg>>]
   /// CHECK-DAG:                      Return [<<Mul>>]
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Mul1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Mul
 
-  public static long Mul1(long arg) {
+  public static long $noinline$Mul1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 1;
   }
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstN1:i\d+>>  IntConstant -1
   /// CHECK-DAG:     <<Mul:i\d+>>      Mul [<<Arg>>,<<ConstN1>>]
   /// CHECK-DAG:                       Return [<<Mul>>]
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$MulN1(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Mul
 
-  public static int MulN1(int arg) {
+  public static int $noinline$MulN1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg * -1;
   }
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const128:j\d+>>  LongConstant 128
   /// CHECK-DAG:     <<Mul:j\d+>>       Mul [<<Const128>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const7:i\d+>>    IntConstant 7
   /// CHECK-DAG:     <<Shl:j\d+>>       Shl [<<Arg>>,<<Const7>>]
   /// CHECK-DAG:                        Return [<<Shl>>]
 
-  /// CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$MulPowerOfTwo128(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Mul
 
-  public static long MulPowerOfTwo128(long arg) {
+  public static long $noinline$MulPowerOfTwo128(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 128;
   }
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const10:j\d+>>   LongConstant 10
+  /// CHECK-DAG:     <<Const11:j\d+>>   LongConstant 11
+  /// CHECK-DAG:     <<Const12:j\d+>>   LongConstant 12
+  /// CHECK-DAG:     <<Mul1:j\d+>>      Mul [<<Const10>>,<<ArgValue>>]
+  /// CHECK-DAG:     <<Mul2:j\d+>>      Mul [<<Mul1>>,<<Const11>>]
+  /// CHECK-DAG:     <<Mul3:j\d+>>      Mul [<<Mul2>>,<<Const12>>]
+  /// CHECK-DAG:                        Return [<<Mul3>>]
+
+  /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>   ParameterValue
+  /// CHECK-DAG:     <<Const1320:j\d+>>  LongConstant 1320
+  /// CHECK-DAG:     <<Mul:j\d+>>        Mul [<<ArgValue>>,<<Const1320>>]
+  /// CHECK-DAG:                         Return [<<Mul>>]
+
+  public static long $noinline$MulMulMulConst(long arg) {
+    if (doThrow) { throw new Error(); }
+    return 10 * arg * 11 * 12;
+  }
+
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Or0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Or
 
-  public static int Or0(int arg) {
+  public static int $noinline$Or0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg | 0;
   }
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:     <<Or:j\d+>>        Or [<<Arg>>,<<Arg>>]
   /// CHECK-DAG:                        Return [<<Or>>]
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>       ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$OrSame(long) instruction_simplifier (after)
   /// CHECK-NOT:                        Or
 
-  public static long OrSame(long arg) {
+  public static long $noinline$OrSame(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg | arg;
   }
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Shl>>]
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Shl0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Shl
 
-  public static int Shl0(int arg) {
+  public static int $noinline$Shl0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg << 0;
   }
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
 
-  public static long Shr0(long arg) {
+  public static long $noinline$Shr0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >> 0;
   }
 
-  /// CHECK-START: long Main.Shr64(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const64:i\d+>>  IntConstant 64
   /// CHECK-DAG:     <<Shr:j\d+>>      Shr [<<Arg>>,<<Const64>>]
   /// CHECK-DAG:                       Return [<<Shr>>]
 
-  /// CHECK-START: long Main.Shr64(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Shr64(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Shr64(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Shr
 
-  public static long Shr64(long arg) {
+  public static long $noinline$Shr64(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >> 64;
   }
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$Sub0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static long Sub0(long arg) {
+  public static long $noinline$Sub0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg - 0;
   }
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Const0>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubAliasNeg(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static int SubAliasNeg(int arg) {
+  public static int $noinline$SubAliasNeg(int arg) {
+    if (doThrow) { throw new Error(); }
     return 0 - arg;
   }
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const5:i\d+>>    IntConstant 5
+  /// CHECK-DAG:     <<Const6:i\d+>>    IntConstant 6
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const5>>,<<ArgValue>>]
+  /// CHECK-DAG:     <<Add:i\d+>>       Add [<<Sub>>,<<Const6>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const11:i\d+>>   IntConstant 11
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const11>>,<<ArgValue>>]
+  /// CHECK-DAG:                        Return [<<Sub>>]
+
+  public static int $noinline$SubAddConst1(int arg) {
+    if (doThrow) { throw new Error(); }
+    return 5 - arg + 6;
+  }
+
+  /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const14:i\d+>>   IntConstant 14
+  /// CHECK-DAG:     <<Const13:i\d+>>   IntConstant 13
+  /// CHECK-DAG:     <<Add:i\d+>>       Add [<<ArgValue>>,<<Const13>>]
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const14>>,<<Add>>]
+  /// CHECK-DAG:                        Return [<<Sub>>]
+
+  /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:i\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
+  /// CHECK-DAG:     <<Sub:i\d+>>       Sub [<<Const1>>,<<ArgValue>>]
+  /// CHECK-DAG:                        Return [<<Sub>>]
+
+  public static int $noinline$SubAddConst2(int arg) {
+    if (doThrow) { throw new Error(); }
+    return 14 - (arg + 13);
+  }
+
+  /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (before)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>  ParameterValue
+  /// CHECK-DAG:     <<Const17:j\d+>>   LongConstant 17
+  /// CHECK-DAG:     <<Const18:j\d+>>   LongConstant 18
+  /// CHECK-DAG:     <<Sub1:j\d+>>      Sub [<<Const18>>,<<ArgValue>>]
+  /// CHECK-DAG:     <<Sub2:j\d+>>      Sub [<<Const17>>,<<Sub1>>]
+  /// CHECK-DAG:                        Return [<<Sub2>>]
+
+  /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (after)
+  /// CHECK-DAG:     <<ArgValue:j\d+>>  ParameterValue
+  /// CHECK-DAG:     <<ConstM1:j\d+>>   LongConstant -1
+  /// CHECK-DAG:     <<Add:j\d+>>       Add [<<ArgValue>>,<<ConstM1>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static long $noinline$SubSubConst(long arg) {
+    if (doThrow) { throw new Error(); }
+    return 17 - (18 - arg);
+  }
+
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<UShr:j\d+>>     UShr [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<UShr>>]
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (after)
   /// CHECK-NOT:                       UShr
 
-  public static long UShr0(long arg) {
+  public static long $noinline$UShr0(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg >>> 0;
   }
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<Const0>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$Xor0(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Xor
 
-  public static int Xor0(int arg) {
+  public static int $noinline$Xor0(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg ^ 0;
   }
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<ConstF:i\d+>>   IntConstant -1
   /// CHECK-DAG:     <<Xor:i\d+>>      Xor [<<Arg>>,<<ConstF>>]
   /// CHECK-DAG:                       Return [<<Xor>>]
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
   /// CHECK-DAG:                       Return [<<Not>>]
 
-  /// CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$XorAllOnes(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Xor
 
-  public static int XorAllOnes(int arg) {
+  public static int $noinline$XorAllOnes(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg ^ -1;
   }
 
@@ -525,7 +653,7 @@
    * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
    */
 
-  /// CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddNegs1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -533,7 +661,7 @@
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AddNegs1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-NOT:                       Neg
@@ -541,7 +669,8 @@
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  public static int AddNegs1(int arg1, int arg2) {
+  public static int $noinline$AddNegs1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 + -arg2;
   }
 
@@ -556,7 +685,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -566,7 +695,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -577,7 +706,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.AddNegs2(int, int) GVN (after)
+  /// CHECK-START: int Main.$noinline$AddNegs2(int, int) GVN (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg1>>]
@@ -586,7 +715,8 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Add>>,<<Add>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  public static int AddNegs2(int arg1, int arg2) {
+  public static int $noinline$AddNegs2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp1 = -arg1;
     int temp2 = -arg2;
     return (temp1 + temp2) | (temp1 + temp2);
@@ -600,7 +730,7 @@
    * the loop.
    */
 
-  /// CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNegs3(long, long) instruction_simplifier (before)
   //  -------------- Arguments and initial negation operations.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -612,7 +742,7 @@
   /// CHECK:         <<Add:j\d+>>      Add [<<Neg1>>,<<Neg2>>]
   /// CHECK:                           Goto
 
-  /// CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNegs3(long, long) instruction_simplifier (after)
   //  -------------- Arguments and initial negation operations.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -625,7 +755,8 @@
   /// CHECK-NOT:                       Neg
   /// CHECK:                           Goto
 
-  public static long AddNegs3(long arg1, long arg2) {
+  public static long $noinline$AddNegs3(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long res = 0;
     long n_arg1 = -arg1;
     long n_arg2 = -arg2;
@@ -641,24 +772,25 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`.
    */
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg1>>]
   /// CHECK-DAG:     <<Add:j\d+>>      Add [<<Neg>>,<<Arg2>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Arg2>>,<<Arg1>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg1(long, long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
 
-  public static long AddNeg1(long arg1, long arg2) {
+  public static long $noinline$AddNeg1(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 + arg2;
   }
 
@@ -671,7 +803,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
@@ -680,7 +812,7 @@
   /// CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Res>>]
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg2>>]
@@ -689,10 +821,11 @@
   /// CHECK-DAG:     <<Res:j\d+>>      Or [<<Add1>>,<<Add2>>]
   /// CHECK-DAG:                       Return [<<Res>>]
 
-  /// CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$AddNeg2(long, long) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static long AddNeg2(long arg1, long arg2) {
+  public static long $noinline$AddNeg2(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long temp = -arg2;
     return (arg1 + temp) | (arg1 + temp);
   }
@@ -702,20 +835,21 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
    */
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:j\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:j\d+>>     Neg [<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Neg2>>]
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
 
-  public static long NegNeg1(long arg) {
+  public static long $noinline$NegNeg1(long arg) {
+    if (doThrow) { throw new Error(); }
     return -(-arg);
   }
 
@@ -726,29 +860,30 @@
    * and in `InstructionSimplifierVisitor::VisitAdd`.
    */
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Neg1:i\d+>>     Neg [<<Arg>>]
   /// CHECK-DAG:     <<Neg2:i\d+>>     Neg [<<Neg1>>]
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Neg2>>,<<Neg1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg>>,<<Arg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
 
-  /// CHECK-START: int Main.NegNeg2(int) constant_folding_after_inlining (after)
+  /// CHECK-START: int Main.$noinline$NegNeg2(int) constant_folding_after_inlining (after)
   /// CHECK:         <<Const0:i\d+>>   IntConstant 0
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Add
   /// CHECK:                           Return [<<Const0>>]
 
-  public static int NegNeg2(int arg) {
+  public static int $noinline$NegNeg2(int arg) {
+    if (doThrow) { throw new Error(); }
     int temp = -arg;
     return temp + -temp;
   }
@@ -760,22 +895,23 @@
    * and in `InstructionSimplifierVisitor::VisitSub`.
    */
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:j\d+>>   LongConstant 0
   /// CHECK-DAG:     <<Neg:j\d+>>      Neg [<<Arg>>]
   /// CHECK-DAG:     <<Sub:j\d+>>      Sub [<<Const0>>,<<Neg>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NegNeg3(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
   /// CHECK-NOT:                       Sub
 
-  public static long NegNeg3(long arg) {
+  public static long $noinline$NegNeg3(long arg) {
+    if (doThrow) { throw new Error(); }
     return 0 - -arg;
   }
 
@@ -785,23 +921,24 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
    */
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Sub>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg2>>,<<Arg1>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub1(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Neg
 
-  public static int NegSub1(int arg1, int arg2) {
+  public static int $noinline$NegSub1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -(arg1 - arg2);
   }
 
@@ -815,7 +952,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NegSub2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
@@ -824,7 +961,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NegSub2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Arg1>>,<<Arg2>>]
@@ -833,7 +970,8 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Neg1>>,<<Neg2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  public static int NegSub2(int arg1, int arg2) {
+  public static int $noinline$NegSub2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp = arg1 - arg2;
     return -temp | -temp;
   }
@@ -843,41 +981,43 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNot`.
    */
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not1:j\d+>>     Not [<<Arg>>]
   /// CHECK-DAG:     <<Not2:j\d+>>     Not [<<Not1>>]
   /// CHECK-DAG:                       Return [<<Not2>>]
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:                       Return [<<Arg>>]
 
-  /// CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$NotNot1(long) instruction_simplifier (after)
   /// CHECK-NOT:                       Not
 
-  public static long NotNot1(long arg) {
+  public static long $noinline$NotNot1(long arg) {
+    if (doThrow) { throw new Error(); }
     return ~~arg;
   }
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not1:i\d+>>     Not [<<Arg>>]
   /// CHECK-DAG:     <<Not2:i\d+>>     Not [<<Not1>>]
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Not2>>,<<Not1>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:     <<Not:i\d+>>      Not [<<Arg>>]
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Not>>]
   /// CHECK-DAG:                       Return [<<Add>>]
 
-  /// CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$NotNot2(int) instruction_simplifier (after)
   /// CHECK:                           Not
   /// CHECK-NOT:                       Not
 
-  public static int NotNot2(int arg) {
+  public static int $noinline$NotNot2(int arg) {
+    if (doThrow) { throw new Error(); }
     int temp = ~arg;
     return temp + ~temp;
   }
@@ -887,24 +1027,25 @@
    * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
    */
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
   /// CHECK-DAG:     <<Sub:i\d+>>      Sub [<<Neg>>,<<Arg2>>]
   /// CHECK-DAG:                       Return [<<Sub>>]
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg1>>,<<Arg2>>]
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Add>>]
   /// CHECK-DAG:                       Return [<<Neg>>]
 
-  /// CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg1(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Sub
 
-  public static int SubNeg1(int arg1, int arg2) {
+  public static int $noinline$SubNeg1(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     return -arg1 - arg2;
   }
 
@@ -918,7 +1059,7 @@
    * increasing the register pressure by creating or extending live ranges.
    */
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
@@ -927,7 +1068,7 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (after)
   /// CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:i\d+>>     ParameterValue
   /// CHECK-DAG:     <<Neg:i\d+>>      Neg [<<Arg1>>]
@@ -936,10 +1077,11 @@
   /// CHECK-DAG:     <<Or:i\d+>>       Or [<<Sub1>>,<<Sub2>>]
   /// CHECK-DAG:                       Return [<<Or>>]
 
-  /// CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$SubNeg2(int, int) instruction_simplifier (after)
   /// CHECK-NOT:                       Add
 
-  public static int SubNeg2(int arg1, int arg2) {
+  public static int $noinline$SubNeg2(int arg1, int arg2) {
+    if (doThrow) { throw new Error(); }
     int temp = -arg1;
     return (temp - arg2) | (temp - arg2);
   }
@@ -951,7 +1093,7 @@
    * the loop.
    */
 
-  /// CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$SubNeg3(long, long) instruction_simplifier (before)
   //  -------------- Arguments and initial negation operation.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -962,7 +1104,7 @@
   /// CHECK:         <<Sub:j\d+>>      Sub [<<Neg>>,<<Arg2>>]
   /// CHECK:                           Goto
 
-  /// CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$SubNeg3(long, long) instruction_simplifier (after)
   //  -------------- Arguments and initial negation operation.
   /// CHECK-DAG:     <<Arg1:j\d+>>     ParameterValue
   /// CHECK-DAG:     <<Arg2:j\d+>>     ParameterValue
@@ -974,7 +1116,8 @@
   /// CHECK-NOT:                       Neg
   /// CHECK:                           Goto
 
-  public static long SubNeg3(long arg1, long arg2) {
+  public static long $noinline$SubNeg3(long arg1, long arg2) {
+    if (doThrow) { throw new Error(); }
     long res = 0;
     long temp = -arg1;
     for (long i = 0; i < 1; i++) {
@@ -983,7 +1126,7 @@
     return res;
   }
 
-  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -993,15 +1136,16 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
   /// CHECK-DAG:                       Return [<<True>>]
 
-  public static boolean EqualBoolVsIntConst(boolean arg) {
+  public static boolean $noinline$EqualBoolVsIntConst(boolean arg) {
+    if (doThrow) { throw new Error(); }
     return (arg ? 0 : 1) != 2;
   }
 
-  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -1011,11 +1155,12 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
   /// CHECK-DAG:                       Return [<<False>>]
 
-  public static boolean NotEqualBoolVsIntConst(boolean arg) {
+  public static boolean $noinline$NotEqualBoolVsIntConst(boolean arg) {
+    if (doThrow) { throw new Error(); }
     return (arg ? 0 : 1) == 2;
   }
 
@@ -1025,7 +1170,7 @@
    * remove the second.
    */
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>    IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>    IntConstant 1
@@ -1033,7 +1178,7 @@
   /// CHECK-DAG:     <<NotNotArg:i\d+>> Select [<<Const1>>,<<Const0>>,<<NotArg>>]
   /// CHECK-DAG:                        Return [<<NotNotArg>>]
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
@@ -1041,81 +1186,86 @@
     return !arg;
   }
 
-  public static boolean NotNotBool(boolean arg) {
+  public static boolean $noinline$NotNotBool(boolean arg) {
+    if (doThrow) { throw new Error(); }
     return !(NegateValue(arg));
   }
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (before)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const2:f\d+>>   FloatConstant 2
   /// CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<Const2>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstP5:f\d+>>  FloatConstant 0.5
   /// CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstP5>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: float Main.Div2(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$Div2(float) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
 
-  public static float Div2(float arg) {
+  public static float $noinline$Div2(float arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 2.0f;
   }
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const2:d\d+>>   DoubleConstant 2
   /// CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<Const2>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstP5:d\d+>>  DoubleConstant 0.5
   /// CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstP5>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: double Main.Div2(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$Div2(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
-  public static double Div2(double arg) {
+  public static double $noinline$Div2(double arg) {
+    if (doThrow) { throw new Error(); }
     return arg / 2.0;
   }
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (before)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstMP25:f\d+>>   FloatConstant -0.25
   /// CHECK-DAG:      <<Div:f\d+>>      Div [<<Arg>>,<<ConstMP25>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstM4:f\d+>>  FloatConstant -4
   /// CHECK-DAG:      <<Mul:f\d+>>      Mul [<<Arg>>,<<ConstM4>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: float Main.DivMP25(float) instruction_simplifier (after)
+  /// CHECK-START: float Main.$noinline$DivMP25(float) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
 
-  public static float DivMP25(float arg) {
+  public static float $noinline$DivMP25(float arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstMP25:d\d+>>   DoubleConstant -0.25
   /// CHECK-DAG:      <<Div:d\d+>>      Div [<<Arg>>,<<ConstMP25>>]
   /// CHECK-DAG:                        Return [<<Div>>]
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<ConstM4:d\d+>>  DoubleConstant -4
   /// CHECK-DAG:      <<Mul:d\d+>>      Mul [<<Arg>>,<<ConstM4>>]
   /// CHECK-DAG:                        Return [<<Mul>>]
 
-  /// CHECK-START: double Main.DivMP25(double) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$DivMP25(double) instruction_simplifier (after)
   /// CHECK-NOT:                        Div
-  public static double DivMP25(double arg) {
+  public static double $noinline$DivMP25(double arg) {
+    if (doThrow) { throw new Error(); }
     return arg / -0.25f;
   }
 
@@ -1123,18 +1273,19 @@
    * Test strength reduction of factors of the form (2^n + 1).
    */
 
-  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$mulPow2Plus1(int) instruction_simplifier (before)
   /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const9:i\d+>>      IntConstant 9
   /// CHECK:                            Mul [<<Arg>>,<<Const9>>]
 
-  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$mulPow2Plus1(int) instruction_simplifier (after)
   /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const3:i\d+>>      IntConstant 3
   /// CHECK:       <<Shift:i\d+>>       Shl [<<Arg>>,<<Const3>>]
   /// CHECK-NEXT:                       Add [<<Arg>>,<<Shift>>]
 
-  public static int mulPow2Plus1(int arg) {
+  public static int $noinline$mulPow2Plus1(int arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 9;
   }
 
@@ -1142,62 +1293,69 @@
    * Test strength reduction of factors of the form (2^n - 1).
    */
 
-  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$mulPow2Minus1(long) instruction_simplifier (before)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const31:j\d+>>     LongConstant 31
   /// CHECK:                            Mul [<<Const31>>,<<Arg>>]
 
-  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$mulPow2Minus1(long) instruction_simplifier (after)
   /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
   /// CHECK-DAG:   <<Const5:i\d+>>      IntConstant 5
   /// CHECK:       <<Shift:j\d+>>       Shl [<<Arg>>,<<Const5>>]
   /// CHECK-NEXT:                       Sub [<<Shift>>,<<Arg>>]
 
-  public static long mulPow2Minus1(long arg) {
+  public static long $noinline$mulPow2Minus1(long arg) {
+    if (doThrow) { throw new Error(); }
     return arg * 31;
   }
 
-  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<Field>>,<<Const1>>]
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int booleanFieldNotEqualOne() {
+  public static int $noinline$booleanFieldNotEqualOne() {
+    if (doThrow) { throw new Error(); }
     return (booleanField == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<NE:z\d+>>       Equal [<<Field>>,<<Const0>>]
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const54>>,<<Const13>>,<<Field>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int booleanFieldEqualZero() {
+  public static int $noinline$booleanFieldEqualZero() {
+    if (doThrow) { throw new Error(); }
     return (booleanField != $inline$false()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1210,7 +1368,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
@@ -1221,11 +1379,12 @@
   // Note that we match `LE` from Select because there are two identical
   // LessThanOrEqual instructions.
 
-  public static int intConditionNotEqualOne(int i) {
+  public static int $noinline$intConditionNotEqualOne(int i) {
+    if (doThrow) { throw new Error(); }
     return ((i > 42) == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1238,7 +1397,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
@@ -1249,16 +1408,17 @@
   // Note that we match `LE` from Select because there are two identical
   // LessThanOrEqual instructions.
 
-  public static int intConditionEqualZero(int i) {
+  public static int $noinline$intConditionEqualZero(int i) {
+    if (doThrow) { throw new Error(); }
     return ((i > 42) != $inline$false()) ? 13 : 54;
   }
 
   // Test that conditions on float/double are not flipped.
 
-  /// CHECK-START: int Main.floatConditionNotEqualOne(float) builder (after)
+  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.floatConditionNotEqualOne(float) instruction_simplifier_before_codegen (after)
+  /// CHECK-START: int Main.$noinline$floatConditionNotEqualOne(float) instruction_simplifier_before_codegen (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1267,14 +1427,15 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int floatConditionNotEqualOne(float f) {
+  public static int $noinline$floatConditionNotEqualOne(float f) {
+    if (doThrow) { throw new Error(); }
     return ((f > 42.0f) == true) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.doubleConditionEqualZero(double) builder (after)
+  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) builder (after)
   /// CHECK:                            LessThanOrEqual
 
-  /// CHECK-START: int Main.doubleConditionEqualZero(double) instruction_simplifier_before_codegen (after)
+  /// CHECK-START: int Main.$noinline$doubleConditionEqualZero(double) instruction_simplifier_before_codegen (after)
   /// CHECK-DAG:      <<Arg:d\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1283,42 +1444,45 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<LE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  public static int doubleConditionEqualZero(double d) {
+  public static int $noinline$doubleConditionEqualZero(double d) {
+    if (doThrow) { throw new Error(); }
     return ((d > 42.0) != false) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: int Main.intToDoubleToInt(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intToDoubleToInt(int) instruction_simplifier (after)
   /// CHECK-NOT:                        TypeConversion
 
-  public static int intToDoubleToInt(int value) {
+  public static int $noinline$intToDoubleToInt(int value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by a conversion back.
     return (int) (double) value;
   }
 
-  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (before)
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
-  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      {{d\d+}}          TypeConversion [<<Arg>>]
 
-  /// CHECK-START: java.lang.String Main.intToDoubleToIntPrint(int) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$intToDoubleToIntPrint(int) instruction_simplifier (after)
   /// CHECK-DAG:                        TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
-  public static String intToDoubleToIntPrint(int value) {
+  public static String $noinline$intToDoubleToIntPrint(int value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by a conversion back
     // with another use of the intermediate result.
     double d = (double) value;
@@ -1326,55 +1490,58 @@
     return "d=" + d + ", i=" + i;
   }
 
-  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: int Main.byteToDoubleToInt(byte) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$byteToDoubleToInt(byte) instruction_simplifier (after)
   /// CHECK-NOT:                        TypeConversion
 
-  public static int byteToDoubleToInt(byte value) {
+  public static int $noinline$byteToDoubleToInt(byte value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion, use implicit conversion.
     return (int) (double) value;
   }
 
-  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.floatToDoubleToInt(float) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$floatToDoubleToInt(float) instruction_simplifier (after)
   /// CHECK-DAG:                        TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
-  public static int floatToDoubleToInt(float value) {
+  public static int $noinline$floatToDoubleToInt(float value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion.
     return (int) (double) value;
   }
 
-  /// CHECK-START: java.lang.String Main.floatToDoubleToIntPrint(float) instruction_simplifier (before)
+  /// CHECK-START: java.lang.String Main.$noinline$floatToDoubleToIntPrint(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
-  /// CHECK-START: java.lang.String Main.floatToDoubleToIntPrint(float) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$floatToDoubleToIntPrint(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{i\d+}}          TypeConversion [<<Double>>]
 
-  public static String floatToDoubleToIntPrint(float value) {
+  public static String $noinline$floatToDoubleToIntPrint(float value) {
+    if (doThrow) { throw new Error(); }
     // Lossless conversion followed by another conversion with
     // an extra use of the intermediate result.
     double d = (double) value;
@@ -1382,176 +1549,186 @@
     return "d=" + d + ", i=" + i;
   }
 
-  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:b\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: short Main.byteToDoubleToShort(byte) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$byteToDoubleToShort(byte) instruction_simplifier (after)
   /// CHECK-NOT:                        TypeConversion
 
-  public static short byteToDoubleToShort(byte value) {
+  public static short $noinline$byteToDoubleToShort(byte value) {
+    if (doThrow) { throw new Error(); }
     // Originally, this is byte->double->int->short. The first conversion is lossless,
     // so we merge this with the second one to byte->int which we omit as it's an implicit
     // conversion. Then we eliminate the resulting byte->short as an implicit conversion.
     return (short) (double) value;
   }
 
-  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Double>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:c\d+>>      ParameterValue
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.charToDoubleToShort(char) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$charToDoubleToShort(char) instruction_simplifier (after)
   /// CHECK-DAG:                        TypeConversion
   /// CHECK-NOT:                        TypeConversion
 
-  public static short charToDoubleToShort(char value) {
+  public static short $noinline$charToDoubleToShort(char value) {
+    if (doThrow) { throw new Error(); }
     // Originally, this is char->double->int->short. The first conversion is lossless,
     // so we merge this with the second one to char->int which we omit as it's an implicit
     // conversion. Then we are left with the resulting char->short conversion.
     return (short) (double) value;
   }
 
-  /// CHECK-START: short Main.floatToIntToShort(float) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$floatToIntToShort(float) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.floatToIntToShort(float) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$floatToIntToShort(float) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:f\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  public static short floatToIntToShort(float value) {
+  public static short $noinline$floatToIntToShort(float value) {
+    if (doThrow) { throw new Error(); }
     // Lossy FP to integral conversion followed by another conversion: no simplification.
     return (short) value;
   }
 
-  /// CHECK-START: int Main.intToFloatToInt(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intToFloatToInt(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.intToFloatToInt(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intToFloatToInt(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Float:f\d+>>    TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Float>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  public static int intToFloatToInt(int value) {
+  public static int $noinline$intToFloatToInt(int value) {
+    if (doThrow) { throw new Error(); }
     // Lossy integral to FP conversion followed another conversion: no simplification.
     return (int) (float) value;
   }
 
-  /// CHECK-START: double Main.longToIntToDouble(long) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$longToIntToDouble(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  /// CHECK-START: double Main.longToIntToDouble(long) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$longToIntToDouble(long) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  public static double longToIntToDouble(long value) {
+  public static double $noinline$longToIntToDouble(long value) {
+    if (doThrow) { throw new Error(); }
     // Lossy long-to-int conversion followed an integral to FP conversion: no simplification.
     return (double) (int) value;
   }
 
-  /// CHECK-START: long Main.longToIntToLong(long) instruction_simplifier (before)
+  /// CHECK-START: long Main.$noinline$longToIntToLong(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Long>>]
 
-  /// CHECK-START: long Main.longToIntToLong(long) instruction_simplifier (after)
+  /// CHECK-START: long Main.$noinline$longToIntToLong(long) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Long>>]
 
-  public static long longToIntToLong(long value) {
+  public static long $noinline$longToIntToLong(long value) {
+    if (doThrow) { throw new Error(); }
     // Lossy long-to-int conversion followed an int-to-long conversion: no simplification.
     return (long) (int) value;
   }
 
-  /// CHECK-START: short Main.shortToCharToShort(short) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$shortToCharToShort(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<Char>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.shortToCharToShort(short) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$shortToCharToShort(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  public static short shortToCharToShort(short value) {
+  public static short $noinline$shortToCharToShort(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion to original type.
     return (short) (char) value;
   }
 
-  /// CHECK-START: int Main.shortToLongToInt(short) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$shortToLongToInt(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Long:j\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Int:i\d+>>      TypeConversion [<<Long>>]
   /// CHECK-DAG:                        Return [<<Int>>]
 
-  /// CHECK-START: int Main.shortToLongToInt(short) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$shortToLongToInt(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  public static int shortToLongToInt(short value) {
+  public static int $noinline$shortToLongToInt(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion, use implicit conversion.
     return (int) (long) value;
   }
 
-  /// CHECK-START: byte Main.shortToCharToByte(short) instruction_simplifier (before)
+  /// CHECK-START: byte Main.$noinline$shortToCharToByte(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Char>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  /// CHECK-START: byte Main.shortToCharToByte(short) instruction_simplifier (after)
+  /// CHECK-START: byte Main.$noinline$shortToCharToByte(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  public static byte shortToCharToByte(short value) {
+  public static byte $noinline$shortToCharToByte(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion losing bits
     // from the original type. Simplify to use only one conversion.
     return (byte) (char) value;
   }
 
-  /// CHECK-START: java.lang.String Main.shortToCharToBytePrint(short) instruction_simplifier (before)
+  /// CHECK-START: java.lang.String Main.$noinline$shortToCharToBytePrint(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
 
-  /// CHECK-START: java.lang.String Main.shortToCharToBytePrint(short) instruction_simplifier (after)
+  /// CHECK-START: java.lang.String Main.$noinline$shortToCharToBytePrint(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:      {{b\d+}}          TypeConversion [<<Char>>]
 
-  public static String shortToCharToBytePrint(short value) {
+  public static String $noinline$shortToCharToBytePrint(short value) {
+    if (doThrow) { throw new Error(); }
     // Integral conversion followed by non-widening integral conversion losing bits
     // from the original type with an extra use of the intermediate result.
     char c = (char) value;
@@ -1559,7 +1736,7 @@
     return "c=" + ((int) c) + ", b=" + ((int) b);  // implicit conversions.
   }
 
-  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (before)
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:j\d+>>     LongConstant 255
   /// CHECK-DAG:      <<And:j\d+>>      And [<<Mask>>,<<Arg>>]
@@ -1567,58 +1744,61 @@
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Int>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:      <<Byte:b\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Byte>>]
 
-  /// CHECK-START: byte Main.longAnd0xffToByte(long) instruction_simplifier (after)
+  /// CHECK-START: byte Main.$noinline$longAnd0xffToByte(long) instruction_simplifier (after)
   /// CHECK-NOT:                        And
 
-  public static byte longAnd0xffToByte(long value) {
+  public static byte $noinline$longAnd0xffToByte(long value) {
+    if (doThrow) { throw new Error(); }
     return (byte) (value & 0xff);
   }
 
-  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (before)
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 131071
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<And>>]
   /// CHECK-DAG:                        Return [<<Char>>]
 
-  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Char:c\d+>>     TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Char>>]
 
-  /// CHECK-START: char Main.intAnd0x1ffffToChar(int) instruction_simplifier (after)
+  /// CHECK-START: char Main.$noinline$intAnd0x1ffffToChar(int) instruction_simplifier (after)
   /// CHECK-NOT:                        And
 
-  public static char intAnd0x1ffffToChar(int value) {
+  public static char $noinline$intAnd0x1ffffToChar(int value) {
+    if (doThrow) { throw new Error(); }
     // Keeping all significant bits and one more.
     return (char) (value & 0x1ffff);
   }
 
-  /// CHECK-START: short Main.intAnd0x17fffToShort(int) instruction_simplifier (before)
+  /// CHECK-START: short Main.$noinline$intAnd0x17fffToShort(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  /// CHECK-START: short Main.intAnd0x17fffToShort(int) instruction_simplifier (after)
+  /// CHECK-START: short Main.$noinline$intAnd0x17fffToShort(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 98303
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
   /// CHECK-DAG:      <<Short:s\d+>>    TypeConversion [<<And>>]
   /// CHECK-DAG:                        Return [<<Short>>]
 
-  public static short intAnd0x17fffToShort(int value) {
+  public static short $noinline$intAnd0x17fffToShort(int value) {
+    if (doThrow) { throw new Error(); }
     // No simplification: clearing a significant bit.
     return (short) (value & 0x17fff);
   }
 
-  /// CHECK-START: double Main.shortAnd0xffffToShortToDouble(short) instruction_simplifier (before)
+  /// CHECK-START: double Main.$noinline$shortAnd0xffffToShortToDouble(short) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Mask:i\d+>>     IntConstant 65535
   /// CHECK-DAG:      <<And:i\d+>>      And [<<Mask>>,<<Arg>>]
@@ -1626,45 +1806,49 @@
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Same>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  /// CHECK-START: double Main.shortAnd0xffffToShortToDouble(short) instruction_simplifier (after)
+  /// CHECK-START: double Main.$noinline$shortAnd0xffffToShortToDouble(short) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:s\d+>>      ParameterValue
   /// CHECK-DAG:      <<Double:d\d+>>   TypeConversion [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Double>>]
 
-  public static double shortAnd0xffffToShortToDouble(short value) {
+  public static double $noinline$shortAnd0xffffToShortToDouble(short value) {
+    if (doThrow) { throw new Error(); }
     short same = (short) (value & 0xffff);
     return (double) same;
   }
 
-  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intReverseCondition(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
   /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Const42>>,<<Arg>>]
 
-  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intReverseCondition(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
   /// CHECK-DAG:      <<GE:z\d+>>       GreaterThanOrEqual [<<Arg>>,<<Const42>>]
 
-  public static int intReverseCondition(int i) {
+  public static int $noinline$intReverseCondition(int i) {
+    if (doThrow) { throw new Error(); }
     return (42 > i) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (before)
+  /// CHECK-START: int Main.$noinline$intReverseConditionNaN(int) instruction_simplifier (before)
   /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
   /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
   /// CHECK-DAG:      <<CMP:i\d+>>      Compare [<<Const42>>,<<Result>>]
 
-  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (after)
+  /// CHECK-START: int Main.$noinline$intReverseConditionNaN(int) instruction_simplifier (after)
   /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
   /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
   /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Result>>,<<Const42>>]
 
-  public static int intReverseConditionNaN(int i) {
+  public static int $noinline$intReverseConditionNaN(int i) {
+    if (doThrow) { throw new Error(); }
     return (42 != Math.sqrt(i)) ? 13 : 54;
   }
 
-  public static int runSmaliTest(String name, boolean input) {
+  public static int $noinline$runSmaliTest(String name, boolean input) {
+    if (doThrow) { throw new Error(); }
     try {
       Class<?> c = Class.forName("SmaliTests");
       Method m = c.getMethod(name, new Class[] { boolean.class });
@@ -1674,155 +1858,291 @@
     }
   }
 
+  public static int $noinline$runSmaliTestConst(String name, int arg) {
+    if (doThrow) { throw new Error(); }
+    try {
+      Class<?> c = Class.forName("SmaliTests");
+      Method m = c.getMethod(name, int.class);
+      return (Integer) m.invoke(null, arg);
+    } catch (Exception ex) {
+      throw new Error(ex);
+    }
+  }
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shl:i\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Shl:i\d+>>      Shl [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  public static int $noinline$intUnnecessaryShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value << (shift & 31);
+  }
+
+  /// CHECK-START: long Main.$noinline$longUnnecessaryShiftMasking(long, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const63:i\d+>>  IntConstant 63
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const63>>]
+  /// CHECK-DAG:      <<Shr:j\d+>>      Shr [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shr>>]
+
+  /// CHECK-START: long Main.$noinline$longUnnecessaryShiftMasking(long, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Shr:j\d+>>      Shr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<Shr>>]
+
+  public static long $noinline$longUnnecessaryShiftMasking(long value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value >> (shift & 63);
+  }
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryWiderShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const255:i\d+>> IntConstant 255
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const255>>]
+  /// CHECK-DAG:      <<UShr:i\d+>>     UShr [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<UShr>>]
+
+  /// CHECK-START: int Main.$noinline$intUnnecessaryWiderShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<UShr:i\d+>>     UShr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:                        Return [<<UShr>>]
+
+  public static int $noinline$intUnnecessaryWiderShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value >>> (shift & 0xff);
+  }
+
+  /// CHECK-START: long Main.$noinline$longSmallerShiftMasking(long, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const3>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  /// CHECK-START: long Main.$noinline$longSmallerShiftMasking(long, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:j\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const3:i\d+>>   IntConstant 3
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const3>>]
+  /// CHECK-DAG:      <<Shl:j\d+>>      Shl [<<Value>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Shl>>]
+
+  public static long $noinline$longSmallerShiftMasking(long value, int shift) {
+    if (doThrow) { throw new Error(); }
+    return value << (shift & 3);
+  }
+
+  /// CHECK-START: int Main.$noinline$otherUseOfUnnecessaryShiftMasking(int, int) instruction_simplifier (before)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shr:i\d+>>      Shr [<<Value>>,<<And>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shr>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  /// CHECK-START: int Main.$noinline$otherUseOfUnnecessaryShiftMasking(int, int) instruction_simplifier (after)
+  /// CHECK:          <<Value:i\d+>>    ParameterValue
+  /// CHECK:          <<Shift:i\d+>>    ParameterValue
+  /// CHECK-DAG:      <<Const31:i\d+>>  IntConstant 31
+  /// CHECK-DAG:      <<And:i\d+>>      And [<<Shift>>,<<Const31>>]
+  /// CHECK-DAG:      <<Shr:i\d+>>      Shr [<<Value>>,<<Shift>>]
+  /// CHECK-DAG:      <<Add:i\d+>>      Add [<<Shr>>,<<And>>]
+  /// CHECK-DAG:                        Return [<<Add>>]
+
+  public static int $noinline$otherUseOfUnnecessaryShiftMasking(int value, int shift) {
+    if (doThrow) { throw new Error(); }
+    int temp = shift & 31;
+    return (value >> temp) + temp;
+  }
+
 public static void main(String[] args) {
     int arg = 123456;
 
-    assertLongEquals(Add0(arg), arg);
-    assertIntEquals(AndAllOnes(arg), arg);
-    assertLongEquals(Div1(arg), arg);
-    assertIntEquals(DivN1(arg), -arg);
-    assertLongEquals(Mul1(arg), arg);
-    assertIntEquals(MulN1(arg), -arg);
-    assertLongEquals(MulPowerOfTwo128(arg), (128 * arg));
-    assertIntEquals(Or0(arg), arg);
-    assertLongEquals(OrSame(arg), arg);
-    assertIntEquals(Shl0(arg), arg);
-    assertLongEquals(Shr0(arg), arg);
-    assertLongEquals(Shr64(arg), arg);
-    assertLongEquals(Sub0(arg), arg);
-    assertIntEquals(SubAliasNeg(arg), -arg);
-    assertLongEquals(UShr0(arg), arg);
-    assertIntEquals(Xor0(arg), arg);
-    assertIntEquals(XorAllOnes(arg), ~arg);
-    assertIntEquals(AddNegs1(arg, arg + 1), -(arg + arg + 1));
-    assertIntEquals(AddNegs2(arg, arg + 1), -(arg + arg + 1));
-    assertLongEquals(AddNegs3(arg, arg + 1), -(2 * arg + 1));
-    assertLongEquals(AddNeg1(arg, arg + 1), 1);
-    assertLongEquals(AddNeg2(arg, arg + 1), -1);
-    assertLongEquals(NegNeg1(arg), arg);
-    assertIntEquals(NegNeg2(arg), 0);
-    assertLongEquals(NegNeg3(arg), arg);
-    assertIntEquals(NegSub1(arg, arg + 1), 1);
-    assertIntEquals(NegSub2(arg, arg + 1), 1);
-    assertLongEquals(NotNot1(arg), arg);
-    assertIntEquals(NotNot2(arg), -1);
-    assertIntEquals(SubNeg1(arg, arg + 1), -(arg + arg + 1));
-    assertIntEquals(SubNeg2(arg, arg + 1), -(arg + arg + 1));
-    assertLongEquals(SubNeg3(arg, arg + 1), -(2 * arg + 1));
-    assertBooleanEquals(EqualBoolVsIntConst(true), true);
-    assertBooleanEquals(EqualBoolVsIntConst(true), true);
-    assertBooleanEquals(NotEqualBoolVsIntConst(false), false);
-    assertBooleanEquals(NotEqualBoolVsIntConst(false), false);
-    assertBooleanEquals(NotNotBool(true), true);
-    assertBooleanEquals(NotNotBool(false), false);
-    assertFloatEquals(Div2(100.0f), 50.0f);
-    assertDoubleEquals(Div2(150.0), 75.0);
-    assertFloatEquals(DivMP25(100.0f), -400.0f);
-    assertDoubleEquals(DivMP25(150.0), -600.0);
-    assertIntEquals(UShr28And15(0xc1234567), 0xc);
-    assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL);
-    assertIntEquals(UShr28And7(0xc1234567), 0x4);
-    assertLongEquals(UShr60And7(0xc123456787654321L), 0x4L);
-    assertIntEquals(Shr24And255(0xc1234567), 0xc1);
-    assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L);
-    assertIntEquals(Shr24And127(0xc1234567), 0x41);
-    assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L);
-    assertIntEquals(0, mulPow2Plus1(0));
-    assertIntEquals(9, mulPow2Plus1(1));
-    assertIntEquals(18, mulPow2Plus1(2));
-    assertIntEquals(900, mulPow2Plus1(100));
-    assertIntEquals(111105, mulPow2Plus1(12345));
-    assertLongEquals(0, mulPow2Minus1(0));
-    assertLongEquals(31, mulPow2Minus1(1));
-    assertLongEquals(62, mulPow2Minus1(2));
-    assertLongEquals(3100, mulPow2Minus1(100));
-    assertLongEquals(382695, mulPow2Minus1(12345));
+    assertLongEquals(arg, $noinline$Add0(arg));
+    assertIntEquals(5, $noinline$AddAddSubAddConst(1));
+    assertIntEquals(arg, $noinline$AndAllOnes(arg));
+    assertLongEquals(arg, $noinline$Div1(arg));
+    assertIntEquals(-arg, $noinline$DivN1(arg));
+    assertLongEquals(arg, $noinline$Mul1(arg));
+    assertIntEquals(-arg, $noinline$MulN1(arg));
+    assertLongEquals((128 * arg), $noinline$MulPowerOfTwo128(arg));
+    assertLongEquals(2640, $noinline$MulMulMulConst(2));
+    assertIntEquals(arg, $noinline$Or0(arg));
+    assertLongEquals(arg, $noinline$OrSame(arg));
+    assertIntEquals(arg, $noinline$Shl0(arg));
+    assertLongEquals(arg, $noinline$Shr0(arg));
+    assertLongEquals(arg, $noinline$Shr64(arg));
+    assertLongEquals(arg, $noinline$Sub0(arg));
+    assertIntEquals(-arg, $noinline$SubAliasNeg(arg));
+    assertIntEquals(9, $noinline$SubAddConst1(2));
+    assertIntEquals(-2, $noinline$SubAddConst2(3));
+    assertLongEquals(3, $noinline$SubSubConst(4));
+    assertLongEquals(arg, $noinline$UShr0(arg));
+    assertIntEquals(arg, $noinline$Xor0(arg));
+    assertIntEquals(~arg, $noinline$XorAllOnes(arg));
+    assertIntEquals(-(arg + arg + 1), $noinline$AddNegs1(arg, arg + 1));
+    assertIntEquals(-(arg + arg + 1), $noinline$AddNegs2(arg, arg + 1));
+    assertLongEquals(-(2 * arg + 1), $noinline$AddNegs3(arg, arg + 1));
+    assertLongEquals(1, $noinline$AddNeg1(arg, arg + 1));
+    assertLongEquals(-1, $noinline$AddNeg2(arg, arg + 1));
+    assertLongEquals(arg, $noinline$NegNeg1(arg));
+    assertIntEquals(0, $noinline$NegNeg2(arg));
+    assertLongEquals(arg, $noinline$NegNeg3(arg));
+    assertIntEquals(1, $noinline$NegSub1(arg, arg + 1));
+    assertIntEquals(1, $noinline$NegSub2(arg, arg + 1));
+    assertLongEquals(arg, $noinline$NotNot1(arg));
+    assertIntEquals(-1, $noinline$NotNot2(arg));
+    assertIntEquals(-(arg + arg + 1), $noinline$SubNeg1(arg, arg + 1));
+    assertIntEquals(-(arg + arg + 1), $noinline$SubNeg2(arg, arg + 1));
+    assertLongEquals(-(2 * arg + 1), $noinline$SubNeg3(arg, arg + 1));
+    assertBooleanEquals(true, $noinline$EqualBoolVsIntConst(true));
+    assertBooleanEquals(true, $noinline$EqualBoolVsIntConst(true));
+    assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
+    assertBooleanEquals(false, $noinline$NotEqualBoolVsIntConst(false));
+    assertBooleanEquals(true, $noinline$NotNotBool(true));
+    assertBooleanEquals(false, $noinline$NotNotBool(false));
+    assertFloatEquals(50.0f, $noinline$Div2(100.0f));
+    assertDoubleEquals(75.0, $noinline$Div2(150.0));
+    assertFloatEquals(-400.0f, $noinline$DivMP25(100.0f));
+    assertDoubleEquals(-600.0, $noinline$DivMP25(150.0));
+    assertIntEquals(0xc, $noinline$UShr28And15(0xc1234567));
+    assertLongEquals(0xcL, $noinline$UShr60And15(0xc123456787654321L));
+    assertIntEquals(0x4, $noinline$UShr28And7(0xc1234567));
+    assertLongEquals(0x4L, $noinline$UShr60And7(0xc123456787654321L));
+    assertIntEquals(0xc1, $noinline$Shr24And255(0xc1234567));
+    assertLongEquals(0xc1L, $noinline$Shr56And255(0xc123456787654321L));
+    assertIntEquals(0x41, $noinline$Shr24And127(0xc1234567));
+    assertLongEquals(0x41L, $noinline$Shr56And127(0xc123456787654321L));
+    assertIntEquals(0, $noinline$mulPow2Plus1(0));
+    assertIntEquals(9, $noinline$mulPow2Plus1(1));
+    assertIntEquals(18, $noinline$mulPow2Plus1(2));
+    assertIntEquals(900, $noinline$mulPow2Plus1(100));
+    assertIntEquals(111105, $noinline$mulPow2Plus1(12345));
+    assertLongEquals(0, $noinline$mulPow2Minus1(0));
+    assertLongEquals(31, $noinline$mulPow2Minus1(1));
+    assertLongEquals(62, $noinline$mulPow2Minus1(2));
+    assertLongEquals(3100, $noinline$mulPow2Minus1(100));
+    assertLongEquals(382695, $noinline$mulPow2Minus1(12345));
 
     booleanField = false;
-    assertIntEquals(booleanFieldNotEqualOne(), 54);
-    assertIntEquals(booleanFieldEqualZero(), 54);
+    assertIntEquals($noinline$booleanFieldNotEqualOne(), 54);
+    assertIntEquals($noinline$booleanFieldEqualZero(), 54);
     booleanField = true;
-    assertIntEquals(booleanFieldNotEqualOne(), 13);
-    assertIntEquals(booleanFieldEqualZero(), 13);
-    assertIntEquals(intConditionNotEqualOne(6), 54);
-    assertIntEquals(intConditionNotEqualOne(43), 13);
-    assertIntEquals(intConditionEqualZero(6), 54);
-    assertIntEquals(intConditionEqualZero(43), 13);
-    assertIntEquals(floatConditionNotEqualOne(6.0f), 54);
-    assertIntEquals(floatConditionNotEqualOne(43.0f), 13);
-    assertIntEquals(doubleConditionEqualZero(6.0), 54);
-    assertIntEquals(doubleConditionEqualZero(43.0), 13);
+    assertIntEquals(13, $noinline$booleanFieldNotEqualOne());
+    assertIntEquals(13, $noinline$booleanFieldEqualZero());
+    assertIntEquals(54, $noinline$intConditionNotEqualOne(6));
+    assertIntEquals(13, $noinline$intConditionNotEqualOne(43));
+    assertIntEquals(54, $noinline$intConditionEqualZero(6));
+    assertIntEquals(13, $noinline$intConditionEqualZero(43));
+    assertIntEquals(54, $noinline$floatConditionNotEqualOne(6.0f));
+    assertIntEquals(13, $noinline$floatConditionNotEqualOne(43.0f));
+    assertIntEquals(54, $noinline$doubleConditionEqualZero(6.0));
+    assertIntEquals(13, $noinline$doubleConditionEqualZero(43.0));
 
-    assertIntEquals(1234567, intToDoubleToInt(1234567));
-    assertIntEquals(Integer.MIN_VALUE, intToDoubleToInt(Integer.MIN_VALUE));
-    assertIntEquals(Integer.MAX_VALUE, intToDoubleToInt(Integer.MAX_VALUE));
-    assertStringEquals("d=7654321.0, i=7654321", intToDoubleToIntPrint(7654321));
-    assertIntEquals(12, byteToDoubleToInt((byte) 12));
-    assertIntEquals(Byte.MIN_VALUE, byteToDoubleToInt(Byte.MIN_VALUE));
-    assertIntEquals(Byte.MAX_VALUE, byteToDoubleToInt(Byte.MAX_VALUE));
-    assertIntEquals(11, floatToDoubleToInt(11.3f));
-    assertStringEquals("d=12.25, i=12", floatToDoubleToIntPrint(12.25f));
-    assertIntEquals(123, byteToDoubleToShort((byte) 123));
-    assertIntEquals(Byte.MIN_VALUE, byteToDoubleToShort(Byte.MIN_VALUE));
-    assertIntEquals(Byte.MAX_VALUE, byteToDoubleToShort(Byte.MAX_VALUE));
-    assertIntEquals(1234, charToDoubleToShort((char) 1234));
-    assertIntEquals(Character.MIN_VALUE, charToDoubleToShort(Character.MIN_VALUE));
-    assertIntEquals(/* sign-extended */ -1, charToDoubleToShort(Character.MAX_VALUE));
-    assertIntEquals(12345, floatToIntToShort(12345.75f));
-    assertIntEquals(Short.MAX_VALUE, floatToIntToShort((float)(Short.MIN_VALUE - 1)));
-    assertIntEquals(Short.MIN_VALUE, floatToIntToShort((float)(Short.MAX_VALUE + 1)));
-    assertIntEquals(-54321, intToFloatToInt(-54321));
-    assertDoubleEquals((double) 0x12345678, longToIntToDouble(0x1234567812345678L));
-    assertDoubleEquals(0.0, longToIntToDouble(Long.MIN_VALUE));
-    assertDoubleEquals(-1.0, longToIntToDouble(Long.MAX_VALUE));
-    assertLongEquals(0x0000000012345678L, longToIntToLong(0x1234567812345678L));
-    assertLongEquals(0xffffffff87654321L, longToIntToLong(0x1234567887654321L));
-    assertLongEquals(0L, longToIntToLong(Long.MIN_VALUE));
-    assertLongEquals(-1L, longToIntToLong(Long.MAX_VALUE));
-    assertIntEquals((short) -5678, shortToCharToShort((short) -5678));
-    assertIntEquals(Short.MIN_VALUE, shortToCharToShort(Short.MIN_VALUE));
-    assertIntEquals(Short.MAX_VALUE, shortToCharToShort(Short.MAX_VALUE));
-    assertIntEquals(5678, shortToLongToInt((short) 5678));
-    assertIntEquals(Short.MIN_VALUE, shortToLongToInt(Short.MIN_VALUE));
-    assertIntEquals(Short.MAX_VALUE, shortToLongToInt(Short.MAX_VALUE));
-    assertIntEquals(0x34, shortToCharToByte((short) 0x1234));
-    assertIntEquals(-0x10, shortToCharToByte((short) 0x12f0));
-    assertIntEquals(0, shortToCharToByte(Short.MIN_VALUE));
-    assertIntEquals(-1, shortToCharToByte(Short.MAX_VALUE));
-    assertStringEquals("c=1025, b=1", shortToCharToBytePrint((short) 1025));
-    assertStringEquals("c=1023, b=-1", shortToCharToBytePrint((short) 1023));
-    assertStringEquals("c=65535, b=-1", shortToCharToBytePrint((short) -1));
+    assertIntEquals(1234567, $noinline$intToDoubleToInt(1234567));
+    assertIntEquals(Integer.MIN_VALUE, $noinline$intToDoubleToInt(Integer.MIN_VALUE));
+    assertIntEquals(Integer.MAX_VALUE, $noinline$intToDoubleToInt(Integer.MAX_VALUE));
+    assertStringEquals("d=7654321.0, i=7654321", $noinline$intToDoubleToIntPrint(7654321));
+    assertIntEquals(12, $noinline$byteToDoubleToInt((byte) 12));
+    assertIntEquals(Byte.MIN_VALUE, $noinline$byteToDoubleToInt(Byte.MIN_VALUE));
+    assertIntEquals(Byte.MAX_VALUE, $noinline$byteToDoubleToInt(Byte.MAX_VALUE));
+    assertIntEquals(11, $noinline$floatToDoubleToInt(11.3f));
+    assertStringEquals("d=12.25, i=12", $noinline$floatToDoubleToIntPrint(12.25f));
+    assertIntEquals(123, $noinline$byteToDoubleToShort((byte) 123));
+    assertIntEquals(Byte.MIN_VALUE, $noinline$byteToDoubleToShort(Byte.MIN_VALUE));
+    assertIntEquals(Byte.MAX_VALUE, $noinline$byteToDoubleToShort(Byte.MAX_VALUE));
+    assertIntEquals(1234, $noinline$charToDoubleToShort((char) 1234));
+    assertIntEquals(Character.MIN_VALUE, $noinline$charToDoubleToShort(Character.MIN_VALUE));
+    assertIntEquals(/* sign-extended */ -1, $noinline$charToDoubleToShort(Character.MAX_VALUE));
+    assertIntEquals(12345, $noinline$floatToIntToShort(12345.75f));
+    assertIntEquals(Short.MAX_VALUE, $noinline$floatToIntToShort((float)(Short.MIN_VALUE - 1)));
+    assertIntEquals(Short.MIN_VALUE, $noinline$floatToIntToShort((float)(Short.MAX_VALUE + 1)));
+    assertIntEquals(-54321, $noinline$intToFloatToInt(-54321));
+    assertDoubleEquals((double) 0x12345678, $noinline$longToIntToDouble(0x1234567812345678L));
+    assertDoubleEquals(0.0, $noinline$longToIntToDouble(Long.MIN_VALUE));
+    assertDoubleEquals(-1.0, $noinline$longToIntToDouble(Long.MAX_VALUE));
+    assertLongEquals(0x0000000012345678L, $noinline$longToIntToLong(0x1234567812345678L));
+    assertLongEquals(0xffffffff87654321L, $noinline$longToIntToLong(0x1234567887654321L));
+    assertLongEquals(0L, $noinline$longToIntToLong(Long.MIN_VALUE));
+    assertLongEquals(-1L, $noinline$longToIntToLong(Long.MAX_VALUE));
+    assertIntEquals((short) -5678, $noinline$shortToCharToShort((short) -5678));
+    assertIntEquals(Short.MIN_VALUE, $noinline$shortToCharToShort(Short.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$shortToCharToShort(Short.MAX_VALUE));
+    assertIntEquals(5678, $noinline$shortToLongToInt((short) 5678));
+    assertIntEquals(Short.MIN_VALUE, $noinline$shortToLongToInt(Short.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$shortToLongToInt(Short.MAX_VALUE));
+    assertIntEquals(0x34, $noinline$shortToCharToByte((short) 0x1234));
+    assertIntEquals(-0x10, $noinline$shortToCharToByte((short) 0x12f0));
+    assertIntEquals(0, $noinline$shortToCharToByte(Short.MIN_VALUE));
+    assertIntEquals(-1, $noinline$shortToCharToByte(Short.MAX_VALUE));
+    assertStringEquals("c=1025, b=1", $noinline$shortToCharToBytePrint((short) 1025));
+    assertStringEquals("c=1023, b=-1", $noinline$shortToCharToBytePrint((short) 1023));
+    assertStringEquals("c=65535, b=-1", $noinline$shortToCharToBytePrint((short) -1));
 
-    assertIntEquals(0x21, longAnd0xffToByte(0x1234432112344321L));
-    assertIntEquals(0, longAnd0xffToByte(Long.MIN_VALUE));
-    assertIntEquals(-1, longAnd0xffToByte(Long.MAX_VALUE));
-    assertIntEquals(0x1234, intAnd0x1ffffToChar(0x43211234));
-    assertIntEquals(0, intAnd0x1ffffToChar(Integer.MIN_VALUE));
-    assertIntEquals(Character.MAX_VALUE, intAnd0x1ffffToChar(Integer.MAX_VALUE));
-    assertIntEquals(0x4321, intAnd0x17fffToShort(0x87654321));
-    assertIntEquals(0x0888, intAnd0x17fffToShort(0x88888888));
-    assertIntEquals(0, intAnd0x17fffToShort(Integer.MIN_VALUE));
-    assertIntEquals(Short.MAX_VALUE, intAnd0x17fffToShort(Integer.MAX_VALUE));
+    assertIntEquals(0x21, $noinline$longAnd0xffToByte(0x1234432112344321L));
+    assertIntEquals(0, $noinline$longAnd0xffToByte(Long.MIN_VALUE));
+    assertIntEquals(-1, $noinline$longAnd0xffToByte(Long.MAX_VALUE));
+    assertIntEquals(0x1234, $noinline$intAnd0x1ffffToChar(0x43211234));
+    assertIntEquals(0, $noinline$intAnd0x1ffffToChar(Integer.MIN_VALUE));
+    assertIntEquals(Character.MAX_VALUE, $noinline$intAnd0x1ffffToChar(Integer.MAX_VALUE));
+    assertIntEquals(0x4321, $noinline$intAnd0x17fffToShort(0x87654321));
+    assertIntEquals(0x0888, $noinline$intAnd0x17fffToShort(0x88888888));
+    assertIntEquals(0, $noinline$intAnd0x17fffToShort(Integer.MIN_VALUE));
+    assertIntEquals(Short.MAX_VALUE, $noinline$intAnd0x17fffToShort(Integer.MAX_VALUE));
 
-    assertDoubleEquals(0.0, shortAnd0xffffToShortToDouble((short) 0));
-    assertDoubleEquals(1.0, shortAnd0xffffToShortToDouble((short) 1));
-    assertDoubleEquals(-2.0, shortAnd0xffffToShortToDouble((short) -2));
-    assertDoubleEquals(12345.0, shortAnd0xffffToShortToDouble((short) 12345));
-    assertDoubleEquals((double)Short.MAX_VALUE, shortAnd0xffffToShortToDouble(Short.MAX_VALUE));
-    assertDoubleEquals((double)Short.MIN_VALUE, shortAnd0xffffToShortToDouble(Short.MIN_VALUE));
+    assertDoubleEquals(0.0, $noinline$shortAnd0xffffToShortToDouble((short) 0));
+    assertDoubleEquals(1.0, $noinline$shortAnd0xffffToShortToDouble((short) 1));
+    assertDoubleEquals(-2.0, $noinline$shortAnd0xffffToShortToDouble((short) -2));
+    assertDoubleEquals(12345.0, $noinline$shortAnd0xffffToShortToDouble((short) 12345));
+    assertDoubleEquals((double)Short.MAX_VALUE,
+                       $noinline$shortAnd0xffffToShortToDouble(Short.MAX_VALUE));
+    assertDoubleEquals((double)Short.MIN_VALUE,
+                       $noinline$shortAnd0xffffToShortToDouble(Short.MIN_VALUE));
 
-    assertIntEquals(intReverseCondition(41), 13);
-    assertIntEquals(intReverseConditionNaN(-5), 13);
+    assertIntEquals(13, $noinline$intReverseCondition(41));
+    assertIntEquals(13, $noinline$intReverseConditionNaN(-5));
 
     for (String condition : new String[] { "Equal", "NotEqual" }) {
       for (String constant : new String[] { "True", "False" }) {
         for (String side : new String[] { "Rhs", "Lhs" }) {
           String name = condition + constant + side;
-          assertIntEquals(runSmaliTest(name, true), 5);
-          assertIntEquals(runSmaliTest(name, false), 3);
+          assertIntEquals(5, $noinline$runSmaliTest(name, true));
+          assertIntEquals(3, $noinline$runSmaliTest(name, false));
         }
       }
     }
+
+    assertIntEquals(0, $noinline$runSmaliTestConst("AddSubConst", 1));
+    assertIntEquals(3, $noinline$runSmaliTestConst("SubAddConst", 2));
+    assertIntEquals(-16, $noinline$runSmaliTestConst("SubSubConst1", 3));
+    assertIntEquals(-5, $noinline$runSmaliTestConst("SubSubConst2", 4));
+    assertIntEquals(26, $noinline$runSmaliTestConst("SubSubConst3", 5));
+    assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3));
+    assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3 + 32));
+    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50));
+    assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50 + 64));
+    assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10));
+    assertIntEquals(0x2af37b, $noinline$intUnnecessaryWiderShiftMasking(0xabcdef01, 10 + 128));
+    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2));
+    assertLongEquals(0xaf37bc048d159e24L, $noinline$longSmallerShiftMasking(0xabcdef0123456789L, 2 + 256));
+    assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13));
+    assertIntEquals(0xfffd5e7c, $noinline$otherUseOfUnnecessaryShiftMasking(0xabcdef01, 13 + 512));
   }
 
   private static boolean $inline$true() { return true; }
diff --git a/test/478-checker-clinit-check-pruning/expected.txt b/test/478-checker-clinit-check-pruning/expected.txt
index 7de097f..6f73b65 100644
--- a/test/478-checker-clinit-check-pruning/expected.txt
+++ b/test/478-checker-clinit-check-pruning/expected.txt
@@ -10,3 +10,4 @@
 Main$ClassWithClinit10's static initializer
 Main$ClassWithClinit11's static initializer
 Main$ClassWithClinit12's static initializer
+Main$ClassWithClinit13's static initializer
diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java
index 7993513..6fc12f1 100644
--- a/test/478-checker-clinit-check-pruning/src/Main.java
+++ b/test/478-checker-clinit-check-pruning/src/Main.java
@@ -16,6 +16,8 @@
 
 public class Main {
 
+  static boolean doThrow = false;
+
   /*
    * Ensure an inlined static invoke explicitly triggers the
    * initialization check of the called method's declaring class, and
@@ -310,12 +312,12 @@
   /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before)
   /// CHECK-NOT:                           ClinitCheck
 
-  static void constClassAndInvokeStatic(Iterable it) {
+  static void constClassAndInvokeStatic(Iterable<?> it) {
     $opt$inline$ignoreClass(ClassWithClinit7.class);
     ClassWithClinit7.someStaticMethod(it);
   }
 
-  static void $opt$inline$ignoreClass(Class c) {
+  static void $opt$inline$ignoreClass(Class<?> c) {
   }
 
   static class ClassWithClinit7 {
@@ -324,7 +326,7 @@
     }
 
     // Note: not inlined from constClassAndInvokeStatic() but fully inlined from main().
-    static void someStaticMethod(Iterable it) {
+    static void someStaticMethod(Iterable<?> it) {
       // We're not inlining invoke-interface at the moment.
       it.iterator();
     }
@@ -341,7 +343,7 @@
   /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before)
   /// CHECK-NOT:                           ClinitCheck
 
-  static void sgetAndInvokeStatic(Iterable it) {
+  static void sgetAndInvokeStatic(Iterable<?> it) {
     $opt$inline$ignoreInt(ClassWithClinit8.value);
     ClassWithClinit8.someStaticMethod(it);
   }
@@ -356,7 +358,7 @@
     }
 
     // Note: not inlined from sgetAndInvokeStatic() but fully inlined from main().
-    static void someStaticMethod(Iterable it) {
+    static void someStaticMethod(Iterable<?> it) {
       // We're not inlining invoke-interface at the moment.
       it.iterator();
     }
@@ -372,7 +374,7 @@
   /// CHECK:                               ClinitCheck
   /// CHECK:                               InvokeStaticOrDirect clinit_check:none
 
-  static void constClassSgetAndInvokeStatic(Iterable it) {
+  static void constClassSgetAndInvokeStatic(Iterable<?> it) {
     $opt$inline$ignoreClass(ClassWithClinit9.class);
     $opt$inline$ignoreInt(ClassWithClinit9.value);
     ClassWithClinit9.someStaticMethod(it);
@@ -385,7 +387,7 @@
     }
 
     // Note: not inlined from constClassSgetAndInvokeStatic() but fully inlined from main().
-    static void someStaticMethod(Iterable it) {
+    static void someStaticMethod(Iterable<?> it) {
       // We're not inlining invoke-interface at the moment.
       it.iterator();
     }
@@ -403,12 +405,12 @@
   /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before)
   /// CHECK-NOT:                           ClinitCheck
 
-  static void inlinedInvokeStaticViaNonStatic(Iterable it) {
+  static void inlinedInvokeStaticViaNonStatic(Iterable<?> it) {
     inlinedInvokeStaticViaNonStaticHelper(null);
     inlinedInvokeStaticViaNonStaticHelper(it);
   }
 
-  static void inlinedInvokeStaticViaNonStaticHelper(Iterable it) {
+  static void inlinedInvokeStaticViaNonStaticHelper(Iterable<?> it) {
     ClassWithClinit10.inlinedForNull(it);
   }
 
@@ -418,7 +420,7 @@
       System.out.println("Main$ClassWithClinit10's static initializer");
     }
 
-    static void inlinedForNull(Iterable it) {
+    static void inlinedForNull(Iterable<?> it) {
       if (it != null) {
         // We're not inlining invoke-interface at the moment.
         it.iterator();
@@ -443,7 +445,7 @@
   /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before)
   /// CHECK-NOT:                           ClinitCheck
 
-  static void inlinedInvokeStaticViaStatic(Iterable it) {
+  static void inlinedInvokeStaticViaStatic(Iterable<?> it) {
     ClassWithClinit11.callInlinedForNull(it);
   }
 
@@ -453,11 +455,11 @@
       System.out.println("Main$ClassWithClinit11's static initializer");
     }
 
-    static void callInlinedForNull(Iterable it) {
+    static void callInlinedForNull(Iterable<?> it) {
       inlinedForNull(it);
     }
 
-    static void inlinedForNull(Iterable it) {
+    static void inlinedForNull(Iterable<?> it) {
       // We're not inlining invoke-interface at the moment.
       it.iterator();
     }
@@ -475,7 +477,7 @@
   /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before)
   /// CHECK-NOT:                           ClinitCheck
 
-  static void inlinedInvokeStaticViaStaticTwice(Iterable it) {
+  static void inlinedInvokeStaticViaStaticTwice(Iterable<?> it) {
     ClassWithClinit12.callInlinedForNull(null);
     ClassWithClinit12.callInlinedForNull(it);
   }
@@ -486,11 +488,11 @@
       System.out.println("Main$ClassWithClinit12's static initializer");
     }
 
-    static void callInlinedForNull(Iterable it) {
+    static void callInlinedForNull(Iterable<?> it) {
       inlinedForNull(it);
     }
 
-    static void inlinedForNull(Iterable it) {
+    static void inlinedForNull(Iterable<?> it) {
       if (it != null) {
         // We're not inlining invoke-interface at the moment.
         it.iterator();
@@ -498,6 +500,28 @@
     }
   }
 
+  static class ClassWithClinit13 {
+    static {
+      System.out.println("Main$ClassWithClinit13's static initializer");
+    }
+
+    public static void $inline$forwardToGetIterator(Iterable<?> it) {
+      $noinline$getIterator(it);
+    }
+
+    public static void $noinline$getIterator(Iterable<?> it) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  // TODO: Write checker statements.
+  static Object $noinline$testInliningAndNewInstance(Iterable<?> it) {
+    if (doThrow) { throw new Error(); }
+    ClassWithClinit13.$inline$forwardToGetIterator(it);
+    return new ClassWithClinit13();
+  }
+
   // TODO: Add a test for the case of a static method whose declaring
   // class type index is not available (i.e. when `storage_index`
   // equals `DexFile::kDexNoIndex` in
@@ -517,5 +541,6 @@
     inlinedInvokeStaticViaNonStatic(it);
     inlinedInvokeStaticViaStatic(it);
     inlinedInvokeStaticViaStaticTwice(it);
+    $noinline$testInliningAndNewInstance(it);
   }
 }
diff --git a/test/530-checker-loops1/src/Main.java b/test/530-checker-loops1/src/Main.java
index 948a7b7..dde4d62 100644
--- a/test/530-checker-loops1/src/Main.java
+++ b/test/530-checker-loops1/src/Main.java
@@ -562,7 +562,7 @@
   //
   /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
+  /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnTwoArrayLengths(int n) {
     int[] a = new int[n];
     for (int i = 0; i < a.length; i++) {
@@ -604,7 +604,7 @@
   //
   /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
+  /// CHECK-NOT: Deoptimize
   private static void linearTriangularOnParameter(int n) {
     int[] a = new int[n];
     for (int i = 0; i < n; i++) {
@@ -619,56 +619,56 @@
     }
   }
 
-  /// CHECK-START: void Main.linearTriangularVariationsInnerStrict(int) BCE (before)
+  /// CHECK-START: void Main.linearTriangularStrictLower(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   //
-  /// CHECK-START: void Main.linearTriangularVariationsInnerStrict(int) BCE (after)
+  /// CHECK-START: void Main.linearTriangularStrictLower(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
-  private static void linearTriangularVariationsInnerStrict(int n) {
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularStrictLower(int n) {
     int[] a = new int[n];
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < i; j++) {
         a[j] += 1;
       }
-      for (int j = i - 1; j > -1; j--) {
+      for (int j = i - 1; j >= 0; j--) {
         a[j] += 1;
       }
       for (int j = i; j < n; j++) {
         a[j] += 1;
       }
-      for (int j = n - 1; j > i - 1; j--) {
+      for (int j = n - 1; j >= i; j--) {
         a[j] += 1;
       }
     }
     verifyTriangular(a);
   }
 
-  /// CHECK-START: void Main.linearTriangularVariationsInnerNonStrict(int) BCE (before)
+  /// CHECK-START: void Main.linearTriangularStrictUpper(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   //
-  /// CHECK-START: void Main.linearTriangularVariationsInnerNonStrict(int) BCE (after)
+  /// CHECK-START: void Main.linearTriangularStrictUpper(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
-  private static void linearTriangularVariationsInnerNonStrict(int n) {
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularStrictUpper(int n) {
     int[] a = new int[n];
     for (int i = 0; i < n; i++) {
-      for (int j = 0; j <= i - 1; j++) {
+      for (int j = 0; j <= i; j++) {
         a[j] += 1;
       }
-      for (int j = i - 1; j >= 0; j--) {
+      for (int j = i; j >= 0; j--) {
         a[j] += 1;
       }
-      for (int j = i; j <= n - 1; j++) {
+      for (int j = i + 1; j < n; j++) {
         a[j] += 1;
       }
-      for (int j = n - 1; j >= i; j--) {
+      for (int j = n - 1; j >= i + 1; j--) {
         a[j] += 1;
       }
     }
@@ -802,8 +802,8 @@
     linearTriangularOnTwoArrayLengths(10);
     linearTriangularOnOneArrayLength(10);
     linearTriangularOnParameter(10);
-    linearTriangularVariationsInnerStrict(10);
-    linearTriangularVariationsInnerNonStrict(10);
+    linearTriangularStrictLower(10);
+    linearTriangularStrictUpper(10);
     {
       int[] t = linearTriangularOOB();
       for (int i = 0; i < 200; i++) {
diff --git a/test/530-checker-loops2/src/Main.java b/test/530-checker-loops2/src/Main.java
index b12fbd6..7acf008 100644
--- a/test/530-checker-loops2/src/Main.java
+++ b/test/530-checker-loops2/src/Main.java
@@ -31,7 +31,7 @@
   //
   /// CHECK-START: void Main.bubble(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
-  //  TODO: also CHECK-NOT: Deoptimize, see b/27151190
+  /// CHECK-NOT: Deoptimize
   private static void bubble(int[] a) {
     for (int i = a.length; --i >= 0;) {
       for (int j = 0; j < i; j++) {
@@ -301,6 +301,53 @@
     } while (-1 <= i);
   }
 
+  /// CHECK-START: void Main.justRightTriangular1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.justRightTriangular1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void justRightTriangular1() {
+    int[] a = { 1 } ;
+    for (int i = Integer.MIN_VALUE + 5; i <= Integer.MIN_VALUE + 10; i++) {
+      for (int j = Integer.MIN_VALUE + 4; j < i - 5; j++) {
+        sResult += a[j - (Integer.MIN_VALUE + 4)];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.justRightTriangular2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.justRightTriangular2() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static void justRightTriangular2() {
+    int[] a = { 1 } ;
+    for (int i = Integer.MIN_VALUE + 5; i <= 10; i++) {
+      for (int j = 4; j < i - 5; j++) {
+        sResult += a[j - 4];
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.justOOBTriangular() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.justOOBTriangular() BCE (after)
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.justOOBTriangular() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static void justOOBTriangular() {
+    int[] a = { 1 } ;
+    for (int i = Integer.MIN_VALUE + 4; i <= 10; i++) {
+      for (int j = 4; j < i - 5; j++) {
+        sResult += a[j - 4];
+      }
+    }
+  }
+
   /// CHECK-START: void Main.hiddenOOB1(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   //
@@ -315,7 +362,6 @@
       // Dangerous loop where careless static range analysis would yield strict upper bound
       // on index j of 5. When, for instance, lo and thus i = -2147483648, the upper bound
       // becomes really positive due to arithmetic wrap-around, causing OOB.
-      // Dynamic BCE is feasible though, since it checks the range.
       for (int j = 4; j < i - 5; j++) {
         sResult += a[j - 4];
       }
@@ -336,13 +382,32 @@
       // Dangerous loop where careless static range analysis would yield strict lower bound
       // on index j of 5. When, for instance, hi and thus i = 2147483647, the upper bound
       // becomes really negative due to arithmetic wrap-around, causing OOB.
-      // Dynamic BCE is feasible though, since it checks the range.
       for (int j = 6; j > i + 5; j--) {
         sResult += a[j - 6];
       }
     }
   }
 
+  /// CHECK-START: void Main.hiddenOOB3(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: void Main.hiddenOOB3(int) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  //
+  /// CHECK-START: void Main.hiddenOOB3(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  private static void hiddenOOB3(int hi) {
+    int[] a = { 11 } ;
+    for (int i = -1; i <= hi; i++) {
+      // Dangerous loop where careless static range analysis would yield strict lower bound
+      // on index j of 0. For large i, the initial value of j becomes really negative due
+      // to arithmetic wrap-around, causing OOB.
+      for (int j = i + 1; j < 1; j++) {
+        sResult += a[j];
+      }
+    }
+  }
+
   /// CHECK-START: void Main.hiddenInfiniteOOB() BCE (before)
   /// CHECK-DAG: BoundsCheck
   //
@@ -376,7 +441,6 @@
     for (int i = -1; i <= 0; i++) {
       // Dangerous loop similar as above where the loop is now finite, but the
       // loop still goes out of bounds for i = -1 due to the large upper bound.
-      // Dynamic BCE is feasible though, since it checks the range.
       for (int j = -4; j < 2147483646 * i - 3; j++) {
         sResult += a[j + 4];
       }
@@ -432,6 +496,25 @@
     }
   }
 
+  /// CHECK-START: int Main.doNotHoist(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.doNotHoist(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  public static int doNotHoist(int[] a) {
+     int n = a.length;
+     int x = 0;
+     // BCE applies, but hoisting would crash the loop.
+     for (int i = -10000; i < 10000; i++) {
+       for (int j = 0; j <= 1; j++) {
+         if (0 <= i && i < n)
+           x += a[i];
+       }
+    }
+    return x;
+  }
+
+
   /// CHECK-START: int[] Main.add() BCE (before)
   /// CHECK-DAG: BoundsCheck
   //
@@ -687,7 +770,7 @@
   /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
   //  Order matters:
   /// CHECK:              Deoptimize loop:<<Loop:B\d+>>
-  //  CHECK-NOT:          Goto       loop:<<Loop>>
+  /// CHECK-NOT:          Goto       loop:<<Loop>>
   /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
   /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
   /// CHECK-DAG: {{l\d+}} ArrayGet   loop:<<Loop>>
@@ -839,6 +922,8 @@
     expectEquals(55, justRightDown1());
     expectEquals(55, justRightDown2());
     expectEquals(55, justRightDown3());
+
+    // Large bounds OOB.
     sResult = 0;
     try {
       justOOBUp();
@@ -890,6 +975,23 @@
     }
     expectEquals(1055, sResult);
 
+    // Triangular.
+    sResult = 0;
+    justRightTriangular1();
+    expectEquals(1, sResult);
+    if (HEAVY) {
+      sResult = 0;
+      justRightTriangular2();
+      expectEquals(1, sResult);
+    }
+    sResult = 0;
+    try {
+      justOOBTriangular();
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1001, sResult);
+
     // Hidden OOB.
     sResult = 0;
     try {
@@ -912,6 +1014,15 @@
       sResult += 1000;
     }
     expectEquals(1, sResult);
+    sResult = 0;
+    try {
+      hiddenOOB3(-1);  // no OOB
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(11, sResult);
+
+    // Expensive hidden OOB test.
     if (HEAVY) {
       sResult = 0;
       try {
@@ -920,7 +1031,16 @@
         sResult += 1000;
       }
       expectEquals(1002, sResult);
+      sResult = 0;
+      try {
+        hiddenOOB3(2147483647);  // OOB
+      } catch (ArrayIndexOutOfBoundsException e) {
+        sResult += 1000;
+      }
+      expectEquals(1011, sResult);
     }
+
+    // More hidden OOB.
     sResult = 0;
     try {
       hiddenInfiniteOOB();
@@ -966,6 +1086,9 @@
       expectEquals(i < 128 ? i : 0, a200[i]);
     }
 
+    // No hoisting after BCE.
+    expectEquals(110, doNotHoist(x));
+
     // Addition.
     {
       int[] e1 ={ 1, 2, 3, 4, 4, 4, 4, 3, 2, 1 };
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 24ed2fe..26475ae 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -30,6 +30,18 @@
     }
   }
 
+  public static void assertCharEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertStringContains(String searchTerm, String result) {
+    if (result == null || !result.contains(searchTerm)) {
+      throw new Error("Search term: " + searchTerm + ", not found in: " + result);
+    }
+  }
+
   public static void main(String[] args) {
     stringEqualsSame();
     stringArgumentNotNull("Foo");
@@ -41,6 +53,57 @@
     assertBooleanEquals(true, $opt$noinline$isStringEmpty(""));
     assertBooleanEquals(false, $opt$noinline$isStringEmpty("abc"));
     assertBooleanEquals(false, $opt$noinline$isStringEmpty("0123456789"));
+
+    assertCharEquals('a', $opt$noinline$stringCharAt("a", 0));
+    assertCharEquals('a', $opt$noinline$stringCharAt("abc", 0));
+    assertCharEquals('b', $opt$noinline$stringCharAt("abc", 1));
+    assertCharEquals('c', $opt$noinline$stringCharAt("abc", 2));
+    assertCharEquals('7', $opt$noinline$stringCharAt("0123456789", 7));
+
+    try {
+      $opt$noinline$stringCharAt("abc", -1);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringCharAt", sioob.getStackTrace()[1].toString());
+    }
+    try {
+      $opt$noinline$stringCharAt("abc", 3);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringCharAt", sioob.getStackTrace()[1].toString());
+    }
+    try {
+      $opt$noinline$stringCharAt("abc", Integer.MAX_VALUE);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringCharAt", sioob.getStackTrace()[1].toString());
+    }
+
+    assertCharEquals('7', $opt$noinline$stringCharAtCatch("0123456789", 7));
+    assertCharEquals('\0', $opt$noinline$stringCharAtCatch("0123456789", 10));
+
+    assertIntEquals('a' + 'b' + 'c', $opt$noinline$stringSumChars("abc"));
+    assertIntEquals('a' + 'b' + 'c', $opt$noinline$stringSumLeadingChars("abcdef", 3));
+    try {
+      $opt$noinline$stringSumLeadingChars("abcdef", 7);
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringSumLeadingChars",
+                           sioob.getStackTrace()[1].toString());
+    }
+    assertIntEquals('a' + 'b' + 'c' + 'd', $opt$noinline$stringSum4LeadingChars("abcdef"));
+    try {
+      $opt$noinline$stringSum4LeadingChars("abc");
+      throw new Error("Should throw SIOOB.");
+    } catch (StringIndexOutOfBoundsException sioob) {
+      assertStringContains("java.lang.String.charAt", sioob.getStackTrace()[0].toString());
+      assertStringContains("Main.$opt$noinline$stringSum4LeadingChars",
+                           sioob.getStackTrace()[1].toString());
+    }
   }
 
   /// CHECK-START: int Main.$opt$noinline$getStringLength(java.lang.String) instruction_simplifier (before)
@@ -81,6 +144,144 @@
     return s.isEmpty();
   }
 
+  /// CHECK-START: char Main.$opt$noinline$stringCharAt(java.lang.String, int) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Char:c\d+>>     InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAt(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Pos:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck [<<Pos>>,<<Length>>] is_string_char_at:true
+  /// CHECK-DAG:  <<Char:c\d+>>     ArrayGet [<<NullCk>>,<<Pos>>] is_string_char_at:true
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAt(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  static public char $opt$noinline$stringCharAt(String s, int pos) {
+    if (doThrow) { throw new Error(); }
+    return s.charAt(pos);
+  }
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAtCatch(java.lang.String, int) instruction_simplifier (before)
+  /// CHECK-DAG:  <<Char:c\d+>>     InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAtCatch(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-DAG:  <<String:l\d+>>   ParameterValue
+  /// CHECK-DAG:  <<Pos:i\d+>>      ParameterValue
+  /// CHECK-DAG:  <<NullCk:l\d+>>   NullCheck [<<String>>]
+  /// CHECK-DAG:  <<Length:i\d+>>   ArrayLength [<<NullCk>>] is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck [<<Pos>>,<<Length>>] is_string_char_at:true
+  /// CHECK-DAG:  <<Char:c\d+>>     ArrayGet [<<NullCk>>,<<Pos>>] is_string_char_at:true
+  /// CHECK-DAG:                    Return [<<Char>>]
+
+  /// CHECK-START: char Main.$opt$noinline$stringCharAtCatch(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  static public char $opt$noinline$stringCharAtCatch(String s, int pos) {
+    if (doThrow) { throw new Error(); }
+    try {
+      return s.charAt(pos);
+    } catch (StringIndexOutOfBoundsException ignored) {
+      return '\0';
+    }
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringLength
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringLength
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) GVN (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-NOT:                    ArrayLength is_string_length:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumChars(java.lang.String) BCE (after)
+  /// CHECK-NOT:                    BoundsCheck
+
+  static public int $opt$noinline$stringSumChars(String s) {
+    if (doThrow) { throw new Error(); }
+    int sum = 0;
+    int len = s.length();
+    for (int i = 0; i < len; ++i) {
+      sum += s.charAt(i);
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) instruction_simplifier (before)
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) BCE (after)
+  /// CHECK-DAG:                    Deoptimize env:[[{{[^\]]*}}]]
+
+  /// CHECK-START: int Main.$opt$noinline$stringSumLeadingChars(java.lang.String, int) BCE (after)
+  /// CHECK-NOT:                    BoundsCheck is_string_char_at:true
+
+  static public int $opt$noinline$stringSumLeadingChars(String s, int n) {
+    if (doThrow) { throw new Error(); }
+    int sum = 0;
+    for (int i = 0; i < n; ++i) {
+      sum += s.charAt(i);
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) instruction_simplifier (before)
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+  /// CHECK-DAG:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+  /// CHECK-DAG:                    ArrayLength is_string_length:true
+  /// CHECK-DAG:                    BoundsCheck is_string_char_at:true
+  /// CHECK-DAG:                    ArrayGet is_string_char_at:true
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) instruction_simplifier (after)
+  /// CHECK-NOT:                    InvokeVirtual intrinsic:StringCharAt
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) BCE (after)
+  /// CHECK-DAG:                    Deoptimize env:[[{{[^\]]*}}]]
+
+  /// CHECK-START: int Main.$opt$noinline$stringSum4LeadingChars(java.lang.String) BCE (after)
+  /// CHECK-NOT:                    BoundsCheck is_string_char_at:true
+
+  static public int $opt$noinline$stringSum4LeadingChars(String s) {
+    if (doThrow) { throw new Error(); }
+    int sum = s.charAt(0) + s.charAt(1) + s.charAt(2) + s.charAt(3);
+    return sum;
+  }
+
   /// CHECK-START: boolean Main.stringEqualsSame() instruction_simplifier (before)
   /// CHECK:      InvokeStaticOrDirect
 
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index 3d985bf..09a77ed 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -28,6 +28,12 @@
     }
   }
 
+  public static void assertClassEquals(Class<?> expected, Class<?> result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static boolean doThrow = false;
 
   private static int $noinline$foo(int x) {
@@ -251,6 +257,66 @@
     return "non-boot-image-string";
   }
 
+  /// CHECK-START: java.lang.Class Main.$noinline$getStringClass() sharpening (before)
+  /// CHECK:                LoadClass load_kind:DexCacheViaMethod class_name:java.lang.String
+
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  // Note: load kind depends on PIC/non-PIC
+  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String
+
+  public static Class<?> $noinline$getStringClass() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // String class is known to be in the boot image.
+    return String.class;
+  }
+
+  /// CHECK-START: java.lang.Class Main.$noinline$getOtherClass() sharpening (before)
+  /// CHECK:                LoadClass load_kind:DexCacheViaMethod class_name:Other
+
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() pc_relative_fixups_x86 (after)
+  /// CHECK-DAG:            X86ComputeBaseMethodAddress
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_arm (after)
+  /// CHECK-DAG:            ArmDexCacheArraysBase
+  /// CHECK-DAG:            LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK:                LoadClass load_kind:DexCachePcRelative class_name:Other
+
+  public static Class<?> $noinline$getOtherClass() {
+    // Prevent inlining to avoid the string comparison being optimized away.
+    if (doThrow) { throw new Error(); }
+    // Other class is not in the boot image.
+    return Other.class;
+  }
+
   public static void main(String[] args) {
     assertIntEquals(1, testSimple(1));
     assertIntEquals(1, testDiamond(false, 1));
@@ -262,5 +328,10 @@
     assertIntEquals(-6, testLoopWithDiamond(new int[]{ 3, 4 }, true, 1));
     assertStringEquals("", $noinline$getBootImageString());
     assertStringEquals("non-boot-image-string", $noinline$getNonBootImageString());
+    assertClassEquals(String.class, $noinline$getStringClass());
+    assertClassEquals(Other.class, $noinline$getOtherClass());
   }
 }
+
+class Other {
+}
diff --git a/test/600-verifier-fails/expected.txt b/test/600-verifier-fails/expected.txt
index 8399969..eaa0c93 100644
--- a/test/600-verifier-fails/expected.txt
+++ b/test/600-verifier-fails/expected.txt
@@ -2,3 +2,4 @@
 passed B
 passed C
 passed D
+passed E
diff --git a/test/600-verifier-fails/src/Main.java b/test/600-verifier-fails/src/Main.java
index 64c3d5c..fa25d58 100644
--- a/test/600-verifier-fails/src/Main.java
+++ b/test/600-verifier-fails/src/Main.java
@@ -38,7 +38,6 @@
     test("B");
     test("C");
     test("D");
-    // TODO: enable again
-    // test("E");
+    test("E");
   }
 }
diff --git a/test/608-checker-unresolved-lse/expected.txt b/test/608-checker-unresolved-lse/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/608-checker-unresolved-lse/expected.txt
diff --git a/test/608-checker-unresolved-lse/info.txt b/test/608-checker-unresolved-lse/info.txt
new file mode 100644
index 0000000..466d5f4
--- /dev/null
+++ b/test/608-checker-unresolved-lse/info.txt
@@ -0,0 +1,3 @@
+Regression test for the load store elimination optimization,
+which used to wrongly remove field stores in the presence of
+unresolved accesses.
diff --git a/test/608-checker-unresolved-lse/run b/test/608-checker-unresolved-lse/run
new file mode 100644
index 0000000..226891f
--- /dev/null
+++ b/test/608-checker-unresolved-lse/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use secondary switch to add secondary dex file to class path.
+exec ${RUN} "${@}" --secondary
diff --git a/test/608-checker-unresolved-lse/src-dex2oat-unresolved/MissingSuperClass.java b/test/608-checker-unresolved-lse/src-dex2oat-unresolved/MissingSuperClass.java
new file mode 100644
index 0000000..b11b9be
--- /dev/null
+++ b/test/608-checker-unresolved-lse/src-dex2oat-unresolved/MissingSuperClass.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class MissingSuperClass {
+}
diff --git a/test/608-checker-unresolved-lse/src/Main.java b/test/608-checker-unresolved-lse/src/Main.java
new file mode 100644
index 0000000..c6f8854
--- /dev/null
+++ b/test/608-checker-unresolved-lse/src/Main.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// We make Main extend an unresolved super class. This will lead to an
+// unresolved access to Foo.field, as we won't know if Main can access
+// a package private field.
+public class Main extends MissingSuperClass {
+
+  public static void main(String[] args) {
+    instanceFieldTest();
+    staticFieldTest();
+    instanceFieldTest2();
+  }
+
+  /// CHECK-START: void Main.instanceFieldTest() inliner (before)
+  /// CHECK-NOT:    InstanceFieldSet
+
+  /// CHECK-START: void Main.instanceFieldTest() inliner (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK:        UnresolvedInstanceFieldGet
+
+  // Load store elimination used to remove the InstanceFieldSet, thinking
+  // that the UnresolvedInstanceFieldGet was not related. However inlining
+  // can put you in a situation where the UnresolvedInstanceFieldGet resolves
+  // to the same field as the one in InstanceFieldSet. So the InstanceFieldSet
+  // must be preserved.
+
+  /// CHECK-START: void Main.instanceFieldTest() load_store_elimination (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK:        UnresolvedInstanceFieldGet
+  public static void instanceFieldTest() {
+    Foo f = new Foo();
+    if (f.iField != 42) {
+      throw new Error("Expected 42, got " + f.iField);
+    }
+  }
+
+  /// CHECK-START: void Main.instanceFieldTest2() inliner (before)
+  /// CHECK-NOT:    InstanceFieldSet
+  /// CHECK-NOT:    InstanceFieldGet
+
+  /// CHECK-START: void Main.instanceFieldTest2() inliner (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK:        InstanceFieldGet
+  /// CHECK:        UnresolvedInstanceFieldSet
+  /// CHECK:        InstanceFieldGet
+
+  // Load store elimination will eliminate the first InstanceFieldGet because
+  // it simply follows an InstanceFieldSet. It must however not eliminate the second
+  // InstanceFieldGet, as the UnresolvedInstanceFieldSet might resolve to the same
+  // field.
+
+  /// CHECK-START: void Main.instanceFieldTest2() load_store_elimination (after)
+  /// CHECK:        InstanceFieldSet
+  /// CHECK-NOT:    InstanceFieldGet
+  /// CHECK:        UnresolvedInstanceFieldSet
+  /// CHECK:        InstanceFieldGet
+  public static void instanceFieldTest2() {
+    Foo f = new Foo();
+    int a = f.$inline$GetInstanceField();
+    f.iField = 43;
+    a = f.$inline$GetInstanceField();
+    if (a != 43) {
+      throw new Error("Expected 43, got " + a);
+    }
+  }
+
+  /// CHECK-START: void Main.staticFieldTest() inliner (before)
+  /// CHECK-NOT:    StaticFieldSet
+
+  /// CHECK-START: void Main.staticFieldTest() inliner (after)
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        UnresolvedStaticFieldGet
+
+  /// CHECK-START: void Main.staticFieldTest() load_store_elimination (after)
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        StaticFieldSet
+  /// CHECK:        UnresolvedStaticFieldGet
+  public static void staticFieldTest() {
+    // Ensure Foo is initialized.
+    Foo f = new Foo();
+    f.$inline$StaticSet42();
+    f.$inline$StaticSet43();
+    if (Foo.sField != 43) {
+      throw new Error("Expected 43, got " + Foo.sField);
+    }
+  }
+}
+
+class Foo {
+  // field needs to be package-private to make the access in Main.main
+  // unresolved.
+  int iField;
+  static int sField;
+
+  public void $inline$StaticSet42() {
+    sField = 42;
+  }
+
+  public void $inline$StaticSet43() {
+    sField = 43;
+  }
+
+  public int $inline$GetInstanceField() {
+    return iField;
+  }
+
+  // Constructor needs to be public to get it resolved in Main.main
+  // and therefore inlined.
+  public Foo() {
+    iField = 42;
+  }
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 8598474..01790ae 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -25,6 +25,7 @@
   004-SignalTest/signaltest.cc \
   004-ReferenceMap/stack_walk_refmap_jni.cc \
   004-StackWalk/stack_walk_jni.cc \
+  004-ThreadStress/thread_stress.cc \
   004-UnsafeTest/unsafe_test.cc \
   044-proxy/native_proxy.cc \
   051-thread/thread_test.cc \
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index ee651b5..dd6b6f3 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -209,9 +209,10 @@
         $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(ART_TEST_RUN_TEST_SKIP), $(ALL_ADDRESS_SIZES))
 
 
-# Disable 137-cfi (b/27391690).
+# Disable 149-suspend-all-stress, its output is flaky (b/28988206).
 # Disable 577-profile-foreign-dex (b/27454772).
 TEST_ART_BROKEN_ALL_TARGET_TESTS := \
+  149-suspend-all-stress \
   577-profile-foreign-dex \
 
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -293,7 +294,8 @@
   147-stripped-dex-fallback \
   554-jit-profile-file \
   529-checker-unresolved \
-  555-checker-regression-x86const
+  555-checker-regression-x86const \
+  608-checker-unresolved-lse
 
 ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \
diff --git a/test/dexdump/all.dex b/test/dexdump/all.dex
new file mode 100644
index 0000000..caf678d
--- /dev/null
+++ b/test/dexdump/all.dex
Binary files differ
diff --git a/test/dexdump/all.lst b/test/dexdump/all.lst
new file mode 100644
index 0000000..17ab9ca
--- /dev/null
+++ b/test/dexdump/all.lst
@@ -0,0 +1,21 @@
+#all.dex
+0x0000043c 8 A <init> ()V (none) -1
+0x00000454 58 A arrays ()V (none) -1
+0x000004a0 130 A binary_ops ()V (none) -1
+0x00000534 66 A binary_ops_2addr ()V (none) -1
+0x00000588 34 A binary_ops_lit16 ()V (none) -1
+0x000005bc 46 A binary_ops_lit8 ()V (none) -1
+0x000005fc 22 A compares ()V (none) -1
+0x00000624 50 A conditionals ()V (none) -1
+0x00000668 56 A constants ()V (none) -1
+0x000006b0 108 A misc ()V (none) -1
+0x0000072c 46 A moves ()V (none) -1
+0x0000076c 32 A packed_switch ()V (none) -1
+0x0000079c 2 A return32 ()I (none) -1
+0x000007b0 2 A return64 ()I (none) -1
+0x000007c4 2 A return_object ()Ljava/lang/Object; (none) -1
+0x000007d8 44 A sparse_switch ()V (none) -1
+0x00000814 58 A static_fields ()V (none) -1
+0x00000860 44 A unary_ops ()V (none) -1
+0x0000089c 58 A instance_fields ()V (none) -1
+0x000008e8 30 A invokes ()V (none) -1
diff --git a/test/dexdump/all.txt b/test/dexdump/all.txt
new file mode 100644
index 0000000..af4fb4c
--- /dev/null
+++ b/test/dexdump/all.txt
@@ -0,0 +1,622 @@
+Processing 'all.dex'...
+Opened 'all.dex', DEX version '035'
+DEX file header:
+magic               : 'dex\n035\0'
+checksum            : d5134208
+signature           : 7af6...100f
+file_size           : 2572
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 46
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 10
+type_ids_off        : 296 (0x000128)
+proto_ids_size      : 3
+proto_ids_off       : 336 (0x000150)
+field_ids_size      : 14
+field_ids_off       : 372 (0x000174)
+method_ids_size     : 21
+method_ids_off      : 484 (0x0001e4)
+class_defs_size     : 1
+class_defs_off      : 652 (0x00028c)
+data_size           : 1888
+data_off            : 684 (0x0002ac)
+
+Class #0 header:
+class_idx           : 4
+access_flags        : 1 (0x0001)
+superclass_idx      : 5
+interfaces_off      : 0 (0x000000)
+source_file_idx     : -1
+annotations_off     : 0 (0x000000)
+class_data_off      : 2310 (0x000906)
+static_fields_size  : 7
+instance_fields_size: 7
+direct_methods_size : 18
+virtual_methods_size: 2
+
+Class #0            -
+  Class descriptor  : 'LA;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+    #0              : (in LA;)
+      name          : 'sB'
+      type          : 'B'
+      access        : 0x000a (PRIVATE STATIC)
+    #1              : (in LA;)
+      name          : 'sC'
+      type          : 'C'
+      access        : 0x000a (PRIVATE STATIC)
+    #2              : (in LA;)
+      name          : 'sI'
+      type          : 'I'
+      access        : 0x000a (PRIVATE STATIC)
+    #3              : (in LA;)
+      name          : 'sJ'
+      type          : 'J'
+      access        : 0x000a (PRIVATE STATIC)
+    #4              : (in LA;)
+      name          : 'sO'
+      type          : 'LA;'
+      access        : 0x000a (PRIVATE STATIC)
+    #5              : (in LA;)
+      name          : 'sS'
+      type          : 'S'
+      access        : 0x000a (PRIVATE STATIC)
+    #6              : (in LA;)
+      name          : 'sZ'
+      type          : 'Z'
+      access        : 0x000a (PRIVATE STATIC)
+  Instance fields   -
+    #0              : (in LA;)
+      name          : 'mB'
+      type          : 'B'
+      access        : 0x0002 (PRIVATE)
+    #1              : (in LA;)
+      name          : 'mC'
+      type          : 'C'
+      access        : 0x0002 (PRIVATE)
+    #2              : (in LA;)
+      name          : 'mI'
+      type          : 'I'
+      access        : 0x0002 (PRIVATE)
+    #3              : (in LA;)
+      name          : 'mJ'
+      type          : 'J'
+      access        : 0x0002 (PRIVATE)
+    #4              : (in LA;)
+      name          : 'mO'
+      type          : 'LA;'
+      access        : 0x0002 (PRIVATE)
+    #5              : (in LA;)
+      name          : 'mS'
+      type          : 'S'
+      access        : 0x0002 (PRIVATE)
+    #6              : (in LA;)
+      name          : 'mZ'
+      type          : 'Z'
+      access        : 0x0002 (PRIVATE)
+  Direct methods    -
+    #0              : (in LA;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+00042c:                                        |[00042c] A.<init>:()V
+00043c: 7010 1400 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@0014
+000442: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #1              : (in LA;)
+      name          : 'arrays'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 3
+      ins           : 0
+      outs          : 0
+      insns size    : 29 16-bit code units
+000444:                                        |[000444] A.arrays:()V
+000454: 4400 0102                              |0000: aget v0, v1, v2
+000458: 4500 0102                              |0002: aget-wide v0, v1, v2
+00045c: 4600 0102                              |0004: aget-object v0, v1, v2
+000460: 4700 0102                              |0006: aget-boolean v0, v1, v2
+000464: 4800 0102                              |0008: aget-byte v0, v1, v2
+000468: 4900 0102                              |000a: aget-char v0, v1, v2
+00046c: 4a00 0102                              |000c: aget-short v0, v1, v2
+000470: 4b00 0102                              |000e: aput v0, v1, v2
+000474: 4c00 0102                              |0010: aput-wide v0, v1, v2
+000478: 4d00 0102                              |0012: aput-object v0, v1, v2
+00047c: 4e00 0102                              |0014: aput-boolean v0, v1, v2
+000480: 4f00 0102                              |0016: aput-byte v0, v1, v2
+000484: 5000 0102                              |0018: aput-char v0, v1, v2
+000488: 5100 0102                              |001a: aput-short v0, v1, v2
+00048c: 0e00                                   |001c: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #2              : (in LA;)
+      name          : 'binary_ops'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 3
+      ins           : 0
+      outs          : 0
+      insns size    : 65 16-bit code units
+000490:                                        |[000490] A.binary_ops:()V
+0004a0: 9000 0102                              |0000: add-int v0, v1, v2
+0004a4: 9100 0102                              |0002: sub-int v0, v1, v2
+0004a8: 9200 0102                              |0004: mul-int v0, v1, v2
+0004ac: 9300 0102                              |0006: div-int v0, v1, v2
+0004b0: 9400 0102                              |0008: rem-int v0, v1, v2
+0004b4: 9500 0102                              |000a: and-int v0, v1, v2
+0004b8: 9600 0102                              |000c: or-int v0, v1, v2
+0004bc: 9700 0102                              |000e: xor-int v0, v1, v2
+0004c0: 9800 0102                              |0010: shl-int v0, v1, v2
+0004c4: 9900 0102                              |0012: shr-int v0, v1, v2
+0004c8: 9a00 0102                              |0014: ushr-int v0, v1, v2
+0004cc: 9b00 0102                              |0016: add-long v0, v1, v2
+0004d0: 9c00 0102                              |0018: sub-long v0, v1, v2
+0004d4: 9d00 0102                              |001a: mul-long v0, v1, v2
+0004d8: 9e00 0102                              |001c: div-long v0, v1, v2
+0004dc: 9f00 0102                              |001e: rem-long v0, v1, v2
+0004e0: a000 0102                              |0020: and-long v0, v1, v2
+0004e4: a100 0102                              |0022: or-long v0, v1, v2
+0004e8: a200 0102                              |0024: xor-long v0, v1, v2
+0004ec: a300 0102                              |0026: shl-long v0, v1, v2
+0004f0: a400 0102                              |0028: shr-long v0, v1, v2
+0004f4: a500 0102                              |002a: ushr-long v0, v1, v2
+0004f8: a600 0102                              |002c: add-float v0, v1, v2
+0004fc: a700 0102                              |002e: sub-float v0, v1, v2
+000500: a800 0102                              |0030: mul-float v0, v1, v2
+000504: a900 0102                              |0032: div-float v0, v1, v2
+000508: aa00 0102                              |0034: rem-float v0, v1, v2
+00050c: ab00 0102                              |0036: add-double v0, v1, v2
+000510: ac00 0102                              |0038: sub-double v0, v1, v2
+000514: ad00 0102                              |003a: mul-double v0, v1, v2
+000518: ae00 0102                              |003c: div-double v0, v1, v2
+00051c: af00 0102                              |003e: rem-double v0, v1, v2
+000520: 0e00                                   |0040: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #3              : (in LA;)
+      name          : 'binary_ops_2addr'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 33 16-bit code units
+000524:                                        |[000524] A.binary_ops_2addr:()V
+000534: b010                                   |0000: add-int/2addr v0, v1
+000536: b110                                   |0001: sub-int/2addr v0, v1
+000538: b210                                   |0002: mul-int/2addr v0, v1
+00053a: b310                                   |0003: div-int/2addr v0, v1
+00053c: b410                                   |0004: rem-int/2addr v0, v1
+00053e: b510                                   |0005: and-int/2addr v0, v1
+000540: b610                                   |0006: or-int/2addr v0, v1
+000542: b710                                   |0007: xor-int/2addr v0, v1
+000544: b810                                   |0008: shl-int/2addr v0, v1
+000546: b910                                   |0009: shr-int/2addr v0, v1
+000548: ba10                                   |000a: ushr-int/2addr v0, v1
+00054a: bb10                                   |000b: add-long/2addr v0, v1
+00054c: bc10                                   |000c: sub-long/2addr v0, v1
+00054e: bd10                                   |000d: mul-long/2addr v0, v1
+000550: be10                                   |000e: div-long/2addr v0, v1
+000552: bf10                                   |000f: rem-long/2addr v0, v1
+000554: c010                                   |0010: and-long/2addr v0, v1
+000556: c110                                   |0011: or-long/2addr v0, v1
+000558: c210                                   |0012: xor-long/2addr v0, v1
+00055a: c310                                   |0013: shl-long/2addr v0, v1
+00055c: c410                                   |0014: shr-long/2addr v0, v1
+00055e: c510                                   |0015: ushr-long/2addr v0, v1
+000560: c610                                   |0016: add-float/2addr v0, v1
+000562: c710                                   |0017: sub-float/2addr v0, v1
+000564: c810                                   |0018: mul-float/2addr v0, v1
+000566: c910                                   |0019: div-float/2addr v0, v1
+000568: ca10                                   |001a: rem-float/2addr v0, v1
+00056a: cb10                                   |001b: add-double/2addr v0, v1
+00056c: cc10                                   |001c: sub-double/2addr v0, v1
+00056e: cd10                                   |001d: mul-double/2addr v0, v1
+000570: ce10                                   |001e: div-double/2addr v0, v1
+000572: cf10                                   |001f: rem-double/2addr v0, v1
+000574: 0e00                                   |0020: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #4              : (in LA;)
+      name          : 'binary_ops_lit16'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 17 16-bit code units
+000578:                                        |[000578] A.binary_ops_lit16:()V
+000588: d010 3412                              |0000: add-int/lit16 v0, v1, #int 4660 // #1234
+00058c: d110 3412                              |0002: rsub-int v0, v1, #int 4660 // #1234
+000590: d210 3412                              |0004: mul-int/lit16 v0, v1, #int 4660 // #1234
+000594: d310 3412                              |0006: div-int/lit16 v0, v1, #int 4660 // #1234
+000598: d410 3412                              |0008: rem-int/lit16 v0, v1, #int 4660 // #1234
+00059c: d510 3412                              |000a: and-int/lit16 v0, v1, #int 4660 // #1234
+0005a0: d610 3412                              |000c: or-int/lit16 v0, v1, #int 4660 // #1234
+0005a4: d710 3412                              |000e: xor-int/lit16 v0, v1, #int 4660 // #1234
+0005a8: 0e00                                   |0010: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #5              : (in LA;)
+      name          : 'binary_ops_lit8'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 23 16-bit code units
+0005ac:                                        |[0005ac] A.binary_ops_lit8:()V
+0005bc: d800 0112                              |0000: add-int/lit8 v0, v1, #int 18 // #12
+0005c0: d900 0112                              |0002: rsub-int/lit8 v0, v1, #int 18 // #12
+0005c4: da00 0112                              |0004: mul-int/lit8 v0, v1, #int 18 // #12
+0005c8: db00 0112                              |0006: div-int/lit8 v0, v1, #int 18 // #12
+0005cc: dc00 0112                              |0008: rem-int/lit8 v0, v1, #int 18 // #12
+0005d0: dd00 0112                              |000a: and-int/lit8 v0, v1, #int 18 // #12
+0005d4: de00 0112                              |000c: or-int/lit8 v0, v1, #int 18 // #12
+0005d8: df00 0112                              |000e: xor-int/lit8 v0, v1, #int 18 // #12
+0005dc: e000 0112                              |0010: shl-int/lit8 v0, v1, #int 18 // #12
+0005e0: e100 0112                              |0012: shr-int/lit8 v0, v1, #int 18 // #12
+0005e4: e200 0112                              |0014: ushr-int/lit8 v0, v1, #int 18 // #12
+0005e8: 0e00                                   |0016: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #6              : (in LA;)
+      name          : 'compares'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 3
+      ins           : 0
+      outs          : 0
+      insns size    : 11 16-bit code units
+0005ec:                                        |[0005ec] A.compares:()V
+0005fc: 2d00 0102                              |0000: cmpl-float v0, v1, v2
+000600: 2e00 0102                              |0002: cmpg-float v0, v1, v2
+000604: 2f00 0102                              |0004: cmpl-double v0, v1, v2
+000608: 3000 0102                              |0006: cmpg-double v0, v1, v2
+00060c: 3100 0102                              |0008: cmp-long v0, v1, v2
+000610: 0e00                                   |000a: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #7              : (in LA;)
+      name          : 'conditionals'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 25 16-bit code units
+000614:                                        |[000614] A.conditionals:()V
+000624: 3210 1800                              |0000: if-eq v0, v1, 0018 // +0018
+000628: 3310 1600                              |0002: if-ne v0, v1, 0018 // +0016
+00062c: 3410 1400                              |0004: if-lt v0, v1, 0018 // +0014
+000630: 3510 1200                              |0006: if-ge v0, v1, 0018 // +0012
+000634: 3610 1000                              |0008: if-gt v0, v1, 0018 // +0010
+000638: 3710 0e00                              |000a: if-le v0, v1, 0018 // +000e
+00063c: 3800 0c00                              |000c: if-eqz v0, 0018 // +000c
+000640: 3900 0a00                              |000e: if-nez v0, 0018 // +000a
+000644: 3a00 0800                              |0010: if-ltz v0, 0018 // +0008
+000648: 3b00 0600                              |0012: if-gez v0, 0018 // +0006
+00064c: 3c00 0400                              |0014: if-gtz v0, 0018 // +0004
+000650: 3d00 0200                              |0016: if-lez v0, 0018 // +0002
+000654: 0e00                                   |0018: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #8              : (in LA;)
+      name          : 'constants'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 28 16-bit code units
+000658:                                        |[000658] A.constants:()V
+000668: 1210                                   |0000: const/4 v0, #int 1 // #1
+00066a: 1300 3412                              |0001: const/16 v0, #int 4660 // #1234
+00066e: 1400 7856 3412                         |0003: const v0, #float 5.69046e-28 // #12345678
+000674: 1500 3412                              |0006: const/high16 v0, #int 305397760 // #1234
+000678: 1600 3412                              |0008: const-wide/16 v0, #int 4660 // #1234
+00067c: 1700 7856 3412                         |000a: const-wide/32 v0, #float 5.69046e-28 // #12345678
+000682: 1800 efcd ab90 7856 3412               |000d: const-wide v0, #double 5.62635e-221 // #1234567890abcdef
+00068c: 1900 3412                              |0012: const-wide/high16 v0, #long 1311673391471656960 // #1234
+000690: 1a00 2c00                              |0014: const-string v0, "string" // string@002c
+000694: 1b00 2c00 0000                         |0016: const-string/jumbo v0, "string" // string@0000002c
+00069a: 1c00 0500                              |0019: const-class v0, Ljava/lang/Object; // type@0005
+00069e: 0e00                                   |001b: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #9              : (in LA;)
+      name          : 'misc'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 5
+      ins           : 0
+      outs          : 0
+      insns size    : 54 16-bit code units
+0006a0:                                        |[0006a0] A.misc:()V
+0006b0: 0000                                   |0000: nop // spacer
+0006b2: 1d00                                   |0001: monitor-enter v0
+0006b4: 1e00                                   |0002: monitor-exit v0
+0006b6: 1f00 0500                              |0003: check-cast v0, Ljava/lang/Object; // type@0005
+0006ba: 2010 0500                              |0005: instance-of v0, v1, Ljava/lang/Object; // type@0005
+0006be: 2110                                   |0007: array-length v0, v1
+0006c0: 2200 0500                              |0008: new-instance v0, Ljava/lang/Object; // type@0005
+0006c4: 2310 0500                              |000a: new-array v0, v1, Ljava/lang/Object; // type@0005
+0006c8: 2454 0900 1032                         |000c: filled-new-array {v0, v1, v2, v3, v4}, [Ljava/lang/Object; // type@0009
+0006ce: 2505 0900 0000                         |000f: filled-new-array/range {v0, v1, v2, v3, v4}, [Ljava/lang/Object; // type@0009
+0006d4: 2600 0c00 0000                         |0012: fill-array-data v0, 0000001e // +0000000c
+0006da: 2700                                   |0015: throw v0
+0006dc: 2806                                   |0016: goto 001c // +0006
+0006de: 2900 0500                              |0017: goto/16 001c // +0005
+0006e2: 2a00 0300 0000                         |0019: goto/32 #00000003
+0006e8: 0e00                                   |001c: return-void
+0006ea: 0000                                   |001d: nop // spacer
+0006ec: 0003 0400 0a00 0000 0100 0000 0200 ... |001e: array-data (24 units)
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #10              : (in LA;)
+      name          : 'moves'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 23 16-bit code units
+00071c:                                        |[00071c] A.moves:()V
+00072c: 0110                                   |0000: move v0, v1
+00072e: 0200 0100                              |0001: move/from16 v0, v1
+000732: 0300 0000 0100                         |0003: move/16 v0, v1
+000738: 0410                                   |0006: move-wide v0, v1
+00073a: 0500 0100                              |0007: move-wide/from16 v0, v1
+00073e: 0600 0000 0100                         |0009: move-wide/16 v0, v1
+000744: 0710                                   |000c: move-object v0, v1
+000746: 0800 0100                              |000d: move-object/from16 v0, v1
+00074a: 0900 0000 0100                         |000f: move-object/16 v0, v1
+000750: 0a00                                   |0012: move-result v0
+000752: 0b00                                   |0013: move-result-wide v0
+000754: 0c00                                   |0014: move-result-object v0
+000756: 0d00                                   |0015: move-exception v0
+000758: 0e00                                   |0016: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #11              : (in LA;)
+      name          : 'packed_switch'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 16 16-bit code units
+00075c:                                        |[00075c] A.packed_switch:()V
+00076c: 2b00 0800 0000                         |0000: packed-switch v0, 00000008 // +00000008
+000772: 0e00                                   |0003: return-void
+000774: 28ff                                   |0004: goto 0003 // -0001
+000776: 28fe                                   |0005: goto 0003 // -0002
+000778: 28fd                                   |0006: goto 0003 // -0003
+00077a: 0000                                   |0007: nop // spacer
+00077c: 0001 0200 feff ff7f 0500 0000 0600 ... |0008: packed-switch-data (8 units)
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #12              : (in LA;)
+      name          : 'return32'
+      type          : '()I'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 1 16-bit code units
+00078c:                                        |[00078c] A.return32:()I
+00079c: 0f00                                   |0000: return v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #13              : (in LA;)
+      name          : 'return64'
+      type          : '()I'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 1 16-bit code units
+0007a0:                                        |[0007a0] A.return64:()I
+0007b0: 1000                                   |0000: return-wide v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #14              : (in LA;)
+      name          : 'return_object'
+      type          : '()Ljava/lang/Object;'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 1 16-bit code units
+0007b4:                                        |[0007b4] A.return_object:()Ljava/lang/Object;
+0007c4: 1100                                   |0000: return-object v0
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #15              : (in LA;)
+      name          : 'sparse_switch'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 22 16-bit code units
+0007c8:                                        |[0007c8] A.sparse_switch:()V
+0007d8: 2c00 0400 0000                         |0000: sparse-switch v0, 00000004 // +00000004
+0007de: 0e00                                   |0003: return-void
+0007e0: 0002 0400 1111 0000 2222 0000 3333 ... |0004: sparse-switch-data (18 units)
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #16              : (in LA;)
+      name          : 'static_fields'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 1
+      ins           : 0
+      outs          : 0
+      insns size    : 29 16-bit code units
+000804:                                        |[000804] A.static_fields:()V
+000814: 6000 0900                              |0000: sget v0, LA;.sI:I // field@0009
+000818: 6100 0a00                              |0002: sget-wide v0, LA;.sJ:J // field@000a
+00081c: 6200 0b00                              |0004: sget-object v0, LA;.sO:LA; // field@000b
+000820: 6300 0d00                              |0006: sget-boolean v0, LA;.sZ:Z // field@000d
+000824: 6400 0700                              |0008: sget-byte v0, LA;.sB:B // field@0007
+000828: 6500 0800                              |000a: sget-char v0, LA;.sC:C // field@0008
+00082c: 6600 0c00                              |000c: sget-short v0, LA;.sS:S // field@000c
+000830: 6700 0900                              |000e: sput v0, LA;.sI:I // field@0009
+000834: 6800 0a00                              |0010: sput-wide v0, LA;.sJ:J // field@000a
+000838: 6900 0b00                              |0012: sput-object v0, LA;.sO:LA; // field@000b
+00083c: 6a00 0d00                              |0014: sput-boolean v0, LA;.sZ:Z // field@000d
+000840: 6b00 0700                              |0016: sput-byte v0, LA;.sB:B // field@0007
+000844: 6c00 0800                              |0018: sput-char v0, LA;.sC:C // field@0008
+000848: 6d00 0500                              |001a: sput-short v0, LA;.mS:S // field@0005
+00084c: 0e00                                   |001c: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #17              : (in LA;)
+      name          : 'unary_ops'
+      type          : '()V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 2
+      ins           : 0
+      outs          : 0
+      insns size    : 22 16-bit code units
+000850:                                        |[000850] A.unary_ops:()V
+000860: 7b10                                   |0000: neg-int v0, v1
+000862: 7c10                                   |0001: not-int v0, v1
+000864: 7d10                                   |0002: neg-long v0, v1
+000866: 7e10                                   |0003: not-long v0, v1
+000868: 7f10                                   |0004: neg-float v0, v1
+00086a: 8010                                   |0005: neg-double v0, v1
+00086c: 8110                                   |0006: int-to-long v0, v1
+00086e: 8210                                   |0007: int-to-float v0, v1
+000870: 8310                                   |0008: int-to-double v0, v1
+000872: 8410                                   |0009: long-to-int v0, v1
+000874: 8510                                   |000a: long-to-float v0, v1
+000876: 8610                                   |000b: long-to-double v0, v1
+000878: 8710                                   |000c: float-to-int v0, v1
+00087a: 8810                                   |000d: float-to-long v0, v1
+00087c: 8910                                   |000e: float-to-double v0, v1
+00087e: 8a10                                   |000f: double-to-int v0, v1
+000880: 8b10                                   |0010: double-to-long v0, v1
+000882: 8c10                                   |0011: double-to-float v0, v1
+000884: 8d10                                   |0012: int-to-byte v0, v1
+000886: 8e10                                   |0013: int-to-char v0, v1
+000888: 8f10                                   |0014: int-to-short v0, v1
+00088a: 0e00                                   |0015: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+  Virtual methods   -
+    #0              : (in LA;)
+      name          : 'instance_fields'
+      type          : '()V'
+      access        : 0x0001 (PUBLIC)
+      code          -
+      registers     : 2
+      ins           : 1
+      outs          : 0
+      insns size    : 29 16-bit code units
+00088c:                                        |[00088c] A.instance_fields:()V
+00089c: 5210 0900                              |0000: iget v0, v1, LA;.sI:I // field@0009
+0008a0: 5310 0a00                              |0002: iget-wide v0, v1, LA;.sJ:J // field@000a
+0008a4: 5410 0b00                              |0004: iget-object v0, v1, LA;.sO:LA; // field@000b
+0008a8: 5510 0d00                              |0006: iget-boolean v0, v1, LA;.sZ:Z // field@000d
+0008ac: 5610 0700                              |0008: iget-byte v0, v1, LA;.sB:B // field@0007
+0008b0: 5710 0800                              |000a: iget-char v0, v1, LA;.sC:C // field@0008
+0008b4: 5810 0c00                              |000c: iget-short v0, v1, LA;.sS:S // field@000c
+0008b8: 5910 0900                              |000e: iput v0, v1, LA;.sI:I // field@0009
+0008bc: 5a10 0a00                              |0010: iput-wide v0, v1, LA;.sJ:J // field@000a
+0008c0: 5b10 0b00                              |0012: iput-object v0, v1, LA;.sO:LA; // field@000b
+0008c4: 5c10 0d00                              |0014: iput-boolean v0, v1, LA;.sZ:Z // field@000d
+0008c8: 5d10 0700                              |0016: iput-byte v0, v1, LA;.sB:B // field@0007
+0008cc: 5e10 0800                              |0018: iput-char v0, v1, LA;.sC:C // field@0008
+0008d0: 5f10 0c00                              |001a: iput-short v0, v1, LA;.sS:S // field@000c
+0008d4: 0e00                                   |001c: return-void
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+    #1              : (in LA;)
+      name          : 'invokes'
+      type          : '()V'
+      access        : 0x0001 (PUBLIC)
+      code          -
+      registers     : 5
+      ins           : 1
+      outs          : 1
+      insns size    : 15 16-bit code units
+0008d8:                                        |[0008d8] A.invokes:()V
+0008e8: 6e54 0a00 1032                         |0000: invoke-virtual {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+0008ee: 6f54 0a00 1032                         |0003: invoke-super {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+0008f4: 7054 0a00 1032                         |0006: invoke-direct {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+0008fa: 7154 0a00 1032                         |0009: invoke-static {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+000900: 7254 0a00 1032                         |000c: invoke-interface {v0, v1, v2, v3, v4}, LA;.invokes:()V // method@000a
+      catches       : (none)
+      positions     : 
+      locals        : 
+
+  source_file_idx   : -1 (unknown)
+
diff --git a/test/dexdump/all.xml b/test/dexdump/all.xml
new file mode 100644
index 0000000..b623ecb
--- /dev/null
+++ b/test/dexdump/all.xml
@@ -0,0 +1,211 @@
+<api>
+<package name=""
+>
+<class name="A"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<constructor name="A"
+ type="A"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+<method name="arrays"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops_2addr"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops_lit16"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="binary_ops_lit8"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="compares"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="conditionals"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="constants"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="misc"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="moves"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="packed_switch"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="return32"
+ return="int"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="return64"
+ return="int"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="return_object"
+ return="java.lang.Object"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="sparse_switch"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="static_fields"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="unary_ops"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="instance_fields"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</method>
+<method name="invokes"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</method>
+</class>
+</package>
+</api>
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index d088e8c..e6f8411 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -16,6 +16,7 @@
 
 package com.android.ahat;
 
+import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
@@ -24,9 +25,11 @@
 import com.android.tools.perflib.heap.Snapshot;
 import com.android.tools.perflib.heap.StackFrame;
 import com.android.tools.perflib.heap.StackTrace;
-import com.android.tools.perflib.captures.MemoryMappedFileBuffer;
+
 import com.google.common.collect.Lists;
+
 import gnu.trove.TObjectProcedure;
+
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index 8defba2..3cdb40c 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -19,9 +19,10 @@
 import com.android.tools.perflib.heap.ArrayInstance;
 import com.android.tools.perflib.heap.ClassInstance;
 import com.android.tools.perflib.heap.ClassObj;
-import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.Heap;
+import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.Type;
+
 import java.awt.image.BufferedImage;
 
 /**
@@ -42,11 +43,11 @@
    * Returns null if the instance is not a byte array.
    */
   private static byte[] asByteArray(Instance inst) {
-    if (! (inst instanceof ArrayInstance)) {
+    if (!(inst instanceof ArrayInstance)) {
       return null;
     }
 
-    ArrayInstance array = (ArrayInstance)inst;
+    ArrayInstance array = (ArrayInstance) inst;
     if (array.getArrayType() != Type.BYTE) {
       return null;
     }
@@ -54,7 +55,7 @@
     Object[] objs = array.getValues();
     byte[] bytes = new byte[objs.length];
     for (int i = 0; i < objs.length; i++) {
-      Byte b = (Byte)objs[i];
+      Byte b = (Byte) objs[i];
       bytes[i] = b.byteValue();
     }
     return bytes;
@@ -143,10 +144,10 @@
     int[] abgr = new int[height * width];
     for (int i = 0; i < abgr.length; i++) {
       abgr[i] = (
-          (((int)buffer[i * 4 + 3] & 0xFF) << 24) +
-          (((int)buffer[i * 4 + 0] & 0xFF) << 16) +
-          (((int)buffer[i * 4 + 1] & 0xFF) << 8) +
-          ((int)buffer[i * 4 + 2] & 0xFF));
+          (((int) buffer[i * 4 + 3] & 0xFF) << 24)
+          + (((int) buffer[i * 4 + 0] & 0xFF) << 16)
+          + (((int) buffer[i * 4 + 1] & 0xFF) << 8)
+          + ((int) buffer[i * 4 + 2] & 0xFF));
     }
 
     BufferedImage bitmap = new BufferedImage(
@@ -185,7 +186,7 @@
     if (!(value instanceof Instance)) {
       return null;
     }
-    return (Instance)value;
+    return (Instance) value;
   }
 
   /**
@@ -199,7 +200,7 @@
     if (!(value instanceof Integer)) {
       return def;
     }
-    return (Integer)value;
+    return (Integer) value;
   }
 
   /**
@@ -213,7 +214,7 @@
     if (!(value instanceof Long)) {
       return def;
     }
-    return (Long)value;
+    return (Long) value;
   }
 
   /**
@@ -226,7 +227,7 @@
     if (!(value instanceof Instance)) {
       return null;
     }
-    return asByteArray((Instance)value);
+    return asByteArray((Instance) value);
   }
 
   // Return the bitmap instance associated with this object, or null if there
@@ -243,7 +244,7 @@
     }
 
     if (inst instanceof ArrayInstance) {
-      ArrayInstance array = (ArrayInstance)inst;
+      ArrayInstance array = (ArrayInstance) inst;
       if (array.getArrayType() == Type.BYTE && inst.getHardReverseReferences().size() == 1) {
         Instance ref = inst.getHardReverseReferences().get(0);
         ClassObj clsref = ref.getClassObj();
@@ -323,10 +324,10 @@
     // Note: We know inst as an instance of ClassInstance because we already
     // read the nativePtr field from it.
     Instance registry = null;
-    for (ClassInstance.FieldValue field : ((ClassInstance)inst).getValues()) {
+    for (ClassInstance.FieldValue field : ((ClassInstance) inst).getValues()) {
       Object fieldValue = field.getValue();
       if (fieldValue instanceof Instance) {
-        Instance fieldInst = (Instance)fieldValue;
+        Instance fieldInst = (Instance) fieldValue;
         if (isInstanceOfClass(fieldInst, "libcore.util.NativeAllocationRegistry")) {
           registry = fieldInst;
           break;
diff --git a/tools/ahat/src/Site.java b/tools/ahat/src/Site.java
index d504096..dbb84f6 100644
--- a/tools/ahat/src/Site.java
+++ b/tools/ahat/src/Site.java
@@ -20,6 +20,7 @@
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.StackFrame;
+
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
diff --git a/tools/ahat/src/Sort.java b/tools/ahat/src/Sort.java
index c5f89c3..8a3d9f2 100644
--- a/tools/ahat/src/Sort.java
+++ b/tools/ahat/src/Sort.java
@@ -16,13 +16,14 @@
 
 package com.android.ahat;
 
-import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.Heap;
+import com.android.tools.perflib.heap.Instance;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
-import java.util.List;
 import java.util.Iterator;
+import java.util.List;
 
 /**
  * Provides Comparators and helper functions for sorting Instances, Sites, and