Merge "Fix (non-intrinsic) UnsafeCASObject for the read barrier config."
diff --git a/Android.mk b/Android.mk
index 3438beb..97a82e2 100644
--- a/Android.mk
+++ b/Android.mk
@@ -77,6 +77,7 @@
 # product rules
 
 include $(art_path)/runtime/Android.mk
+include $(art_path)/runtime/simulator/Android.mk
 include $(art_path)/compiler/Android.mk
 include $(art_path)/dexdump/Android.mk
 include $(art_path)/dexlist/Android.mk
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 529143d..fe83ba9 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -243,8 +243,8 @@
 TEST_F(CmdlineParserTest, TestLogVerbosity) {
   {
     const char* log_args = "-verbose:"
-        "class,compiler,gc,heap,jdwp,jni,monitor,profiler,signals,startup,third-party-jni,"
-        "threads,verifier";
+        "class,compiler,gc,heap,jdwp,jni,monitor,profiler,signals,simulator,startup,"
+        "third-party-jni,threads,verifier";
 
     LogVerbosity log_verbosity = LogVerbosity();
     log_verbosity.class_linker = true;
@@ -256,6 +256,7 @@
     log_verbosity.monitor = true;
     log_verbosity.profiler = true;
     log_verbosity.signals = true;
+    log_verbosity.simulator = true;
     log_verbosity.startup = true;
     log_verbosity.third_party_jni = true;
     log_verbosity.threads = true;
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 4751989..6c0a0e1 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -606,6 +606,8 @@
         log_verbosity.profiler = true;
       } else if (verbose_options[j] == "signals") {
         log_verbosity.signals = true;
+      } else if (verbose_options[j] == "simulator") {
+        log_verbosity.simulator = true;
       } else if (verbose_options[j] == "startup") {
         log_verbosity.startup = true;
       } else if (verbose_options[j] == "third-party-jni") {
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 39f8ee8..18ce563 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1430,8 +1430,4 @@
                                  method_lowering_infos_.data(), count);
 }
 
-bool MIRGraph::SkipCompilationByName(const std::string& methodname) {
-  return cu_->compiler_driver->SkipCompilation(methodname);
-}
-
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 2da8a98..3191fe9 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -564,11 +564,6 @@
   bool SkipCompilation(std::string* skip_message);
 
   /*
-   * Should we skip the compilation of this method based on its name?
-   */
-  bool SkipCompilationByName(const std::string& methodname);
-
-  /*
    * Parse dex method and add MIR at current insert point.  Returns id (which is
    * actually the index of the method in the m_units_ array).
    */
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 05dde9f..3260a7a 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -780,14 +780,6 @@
   PassDriverMEOpts pass_driver(GetPreOptPassManager(), GetPostOptPassManager(), &cu);
   pass_driver.Launch();
 
-  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
-  if (cu.compiler_driver->ProfilePresent()
-      && !cu.mir_graph->MethodIsLeaf()
-      && cu.mir_graph->SkipCompilationByName(PrettyMethod(method_idx, dex_file))) {
-    cu.EndTiming();
-    return nullptr;
-  }
-
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
     cu.mir_graph->DumpCheckStats();
   }
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 76b57dd..ba8f1d0 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -80,6 +80,9 @@
 // given, too all compilations.
 static constexpr bool kRestrictCompilationFiltersToImage = true;
 
+// Print additional info during profile guided compilation.
+static constexpr bool kDebugProfileGuidedCompilation = false;
+
 static double Percentage(size_t x, size_t y) {
   return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y));
 }
@@ -344,8 +347,7 @@
                                const std::string& dump_cfg_file_name, bool dump_cfg_append,
                                CumulativeLogger* timer, int swap_fd,
                                const std::string& profile_file)
-    : profile_present_(false),
-      compiler_options_(compiler_options),
+    : compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_(Compiler::Create(this, compiler_kind)),
@@ -383,12 +385,8 @@
 
   // Read the profile file if one is provided.
   if (!profile_file.empty()) {
-    profile_present_ = profile_file_.LoadFile(profile_file);
-    if (profile_present_) {
-      LOG(INFO) << "Using profile data form file " << profile_file;
-    } else {
-      LOG(INFO) << "Failed to load profile file " << profile_file;
-    }
+    profile_compilation_info_.reset(new ProfileCompilationInfo(profile_file));
+    LOG(INFO) << "Using profile data from file " << profile_file;
   }
 }
 
@@ -569,7 +567,9 @@
         (verified_method->GetEncounteredVerificationFailures() &
             (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 &&
         // Is eligable for compilation by methods-to-compile filter.
-        driver->IsMethodToCompile(method_ref);
+        driver->IsMethodToCompile(method_ref) &&
+        driver->ShouldCompileBasedOnProfile(method_ref);
+
     if (compile) {
       // NOTE: if compiler declines to compile this method, it will return null.
       compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
@@ -766,6 +766,22 @@
   return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end();
 }
 
+bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const {
+  if (profile_compilation_info_ == nullptr) {
+    // If we miss profile information it means that we don't do a profile guided compilation.
+    // Return true, and let the other filters decide if the method should be compiled.
+    return true;
+  }
+  bool result = profile_compilation_info_->ContainsMethod(method_ref);
+
+  if (kDebugProfileGuidedCompilation) {
+    LOG(INFO) << "[ProfileGuidedCompilation] "
+        << (result ? "Compiled" : "Skipped") << " method:"
+        << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file, true);
+  }
+  return result;
+}
+
 class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
  public:
   ResolveCatchBlockExceptionsClassVisitor(
@@ -2280,6 +2296,16 @@
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool, TimingLogger* timings) {
+  if (profile_compilation_info_ != nullptr) {
+    if (!profile_compilation_info_->Load(dex_files)) {
+      LOG(WARNING) << "Failed to load offline profile info from "
+          << profile_compilation_info_->GetFilename()
+          << ". No methods will be compiled";
+    } else if (kDebugProfileGuidedCompilation) {
+      LOG(INFO) << "[ProfileGuidedCompilation] "
+          << profile_compilation_info_->DumpInfo();
+    }
+  }
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
@@ -2517,39 +2543,6 @@
   return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
 }
 
-bool CompilerDriver::SkipCompilation(const std::string& method_name) {
-  if (!profile_present_) {
-    return false;
-  }
-  // First find the method in the profile file.
-  ProfileFile::ProfileData data;
-  if (!profile_file_.GetProfileData(&data, method_name)) {
-    // Not in profile, no information can be determined.
-    if (kIsDebugBuild) {
-      VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
-    }
-    return true;
-  }
-
-  // Methods that comprise top_k_threshold % of the total samples will be compiled.
-  // Compare against the start of the topK percentage bucket just in case the threshold
-  // falls inside a bucket.
-  bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent()
-                 <= compiler_options_->GetTopKProfileThreshold();
-  if (kIsDebugBuild) {
-    if (compile) {
-      LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
-          << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
-          << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
-    } else {
-      VLOG(compiler) << "not compiling method " << method_name
-          << " because it's not part of leading " << compiler_options_->GetTopKProfileThreshold()
-          << "% samples)";
-    }
-  }
-  return !compile;
-}
-
 std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
   std::ostringstream oss;
   Runtime* const runtime = Runtime::Current();
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 5c5a63d..f0360ce 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -31,11 +31,11 @@
 #include "compiler.h"
 #include "dex_file.h"
 #include "driver/compiled_method_storage.h"
+#include "jit/offline_profiling_info.h"
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "mirror/class.h"  // For mirror::Class::Status.
 #include "os.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -147,10 +147,6 @@
     return compiler_.get();
   }
 
-  bool ProfilePresent() const {
-    return profile_present_;
-  }
-
   // Are we compiling and creating an image file?
   bool IsBootImage() const {
     return boot_image_;
@@ -440,6 +436,10 @@
   // Checks whether the provided method should be compiled, i.e., is in method_to_compile_.
   bool IsMethodToCompile(const MethodReference& method_ref) const;
 
+  // Checks whether profile guided compilation is enabled and if the method should be compiled
+  // according to the profile file.
+  bool ShouldCompileBasedOnProfile(const MethodReference& method_ref) const;
+
   void RecordClassStatus(ClassReference ref, mirror::Class::Status status)
       REQUIRES(!compiled_classes_lock_);
 
@@ -449,9 +449,6 @@
                                        uint16_t class_def_idx,
                                        const DexFile& dex_file) const;
 
-  // Should the compiler run on this method given profile information?
-  bool SkipCompilation(const std::string& method_name);
-
   // Get memory usage during compilation.
   std::string GetMemoryUsageString(bool extended) const;
 
@@ -590,9 +587,6 @@
                       ThreadPool* thread_pool, TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_);
 
-  ProfileFile profile_file_;
-  bool profile_present_;
-
   const CompilerOptions* const compiler_options_;
   VerificationResults* const verification_results_;
   DexFileToMethodInlinerMap* const method_inliner_map_;
@@ -642,6 +636,9 @@
   // This option may be restricted to the boot image, depending on a flag in the implementation.
   std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_;
 
+  // Info for profile guided compilation.
+  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
+
   bool had_hard_verifier_failure_;
 
   size_t thread_count_;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index a24c8a3..4d2d924 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -117,7 +117,7 @@
 }
 
 void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) {
-  ParseUintOption(option, "--inline-max-code-units=", &inline_max_code_units_, Usage);
+  ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage);
 }
 
 void CompilerOptions::ParseDisablePasses(const StringPiece& option,
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index e806acf..06553a6 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -301,40 +301,30 @@
     uint32_t high_pc_ = 0;
   };
 
-  struct LocalVariable {
-    uint16_t vreg;
-    uint32_t dex_pc_low;
-    uint32_t dex_pc_high;
-    const char* name;
-    const char* type;
-    const char* sig;
-  };
+  typedef std::vector<DexFile::LocalInfo> LocalInfos;
 
-  struct DebugInfoCallback {
-    static void NewLocal(void* ctx,
-                         uint16_t vreg,
-                         uint32_t start,
-                         uint32_t end,
-                         const char* name,
-                         const char* type,
-                         const char* sig) {
-      auto* context = static_cast<DebugInfoCallback*>(ctx);
-      if (name != nullptr && type != nullptr) {
-        context->local_variables_.push_back({vreg, start, end, name, type, sig});
-      }
-    }
-    std::vector<LocalVariable> local_variables_;
-  };
+  void LocalInfoCallback(void* ctx, const DexFile::LocalInfo& entry) {
+    static_cast<LocalInfos*>(ctx)->push_back(entry);
+  }
+
+  typedef std::vector<DexFile::PositionInfo> PositionInfos;
+
+  bool PositionInfoCallback(void* ctx, const DexFile::PositionInfo& entry) {
+    static_cast<PositionInfos*>(ctx)->push_back(entry);
+    return false;
+  }
 
   std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) {
     std::vector<const char*> names;
-    const uint8_t* stream = mi->dex_file_->GetDebugInfoStream(mi->code_item_);
-    if (stream != nullptr) {
-      DecodeUnsignedLeb128(&stream);  // line.
-      uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
-      for (uint32_t i = 0; i < parameters_size; ++i) {
-        uint32_t id = DecodeUnsignedLeb128P1(&stream);
-        names.push_back(mi->dex_file_->StringDataByIdx(id));
+    if (mi->code_item_ != nullptr) {
+      const uint8_t* stream = mi->dex_file_->GetDebugInfoStream(mi->code_item_);
+      if (stream != nullptr) {
+        DecodeUnsignedLeb128(&stream);  // line.
+        uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+        for (uint32_t i = 0; i < parameters_size; ++i) {
+          uint32_t id = DecodeUnsignedLeb128P1(&stream);
+          names.push_back(mi->dex_file_->StringDataByIdx(id));
+        }
       }
     }
     return names;
@@ -454,6 +444,7 @@
       const char* last_dex_class_desc = nullptr;
       for (auto mi : compilation_unit.methods_) {
         const DexFile* dex = mi->dex_file_;
+        const DexFile::CodeItem* dex_code = mi->code_item_;
         const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index_);
         const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method);
         const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto);
@@ -473,19 +464,6 @@
           last_dex_class_desc = dex_class_desc;
         }
 
-        // Collect information about local variables and parameters.
-        DebugInfoCallback debug_info_callback;
-        std::vector<const char*> param_names;
-        if (mi->code_item_ != nullptr) {
-          dex->DecodeDebugInfo(mi->code_item_,
-                               is_static,
-                               mi->dex_method_index_,
-                               nullptr,
-                               DebugInfoCallback::NewLocal,
-                               &debug_info_callback);
-          param_names = GetParamNames(mi);
-        }
-
         int start_depth = info_.Depth();
         info_.StartTag(DW_TAG_subprogram);
         WriteName(dex->GetMethodName(dex_method));
@@ -494,50 +472,70 @@
         uint8_t frame_base[] = { DW_OP_call_frame_cfa };
         info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base));
         WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
-        uint32_t vreg = mi->code_item_ == nullptr ? 0 :
-            mi->code_item_->registers_size_ - mi->code_item_->ins_size_;
+
+        // Write parameters. DecodeDebugLocalInfo returns them as well, but it does not
+        // guarantee order or uniqueness so it is safer to iterate over them manually.
+        // DecodeDebugLocalInfo might not also be available if there is no debug info.
+        std::vector<const char*> param_names = GetParamNames(mi);
+        uint32_t arg_reg = 0;
         if (!is_static) {
           info_.StartTag(DW_TAG_formal_parameter);
           WriteName("this");
           info_.WriteFlag(DW_AT_artificial, true);
           WriteLazyType(dex_class_desc);
-          const bool is64bitValue = false;
-          WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
-          vreg++;
+          if (dex_code != nullptr) {
+            // Write the stack location of the parameter.
+            const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
+            const bool is64bitValue = false;
+            WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
+          }
+          arg_reg++;
           info_.EndTag();
         }
         if (dex_params != nullptr) {
           for (uint32_t i = 0; i < dex_params->Size(); ++i) {
             info_.StartTag(DW_TAG_formal_parameter);
             // Parameter names may not be always available.
-            if (i < param_names.size() && param_names[i] != nullptr) {
+            if (i < param_names.size()) {
               WriteName(param_names[i]);
             }
             // Write the type.
             const char* type_desc = dex->StringByTypeIdx(dex_params->GetTypeItem(i).type_idx_);
             WriteLazyType(type_desc);
-            // Write the stack location of the parameter.
             const bool is64bitValue = type_desc[0] == 'D' || type_desc[0] == 'J';
-            WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
-            vreg += is64bitValue ? 2 : 1;
+            if (dex_code != nullptr) {
+              // Write the stack location of the parameter.
+              const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
+              WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
+            }
+            arg_reg += is64bitValue ? 2 : 1;
             info_.EndTag();
           }
-          if (mi->code_item_ != nullptr) {
-            CHECK_EQ(vreg, mi->code_item_->registers_size_);
+          if (dex_code != nullptr) {
+            DCHECK_EQ(arg_reg, dex_code->ins_size_);
           }
         }
-        for (const LocalVariable& var : debug_info_callback.local_variables_) {
-          const uint32_t first_arg = mi->code_item_->registers_size_ - mi->code_item_->ins_size_;
-          if (var.vreg < first_arg) {
-            info_.StartTag(DW_TAG_variable);
-            WriteName(var.name);
-            WriteLazyType(var.type);
-            bool is64bitValue = var.type[0] == 'D' || var.type[0] == 'J';
-            WriteRegLocation(mi, var.vreg, is64bitValue, compilation_unit.low_pc_,
-                             var.dex_pc_low, var.dex_pc_high);
-            info_.EndTag();
+
+        // Write local variables.
+        LocalInfos local_infos;
+        if (dex->DecodeDebugLocalInfo(dex_code,
+                                      is_static,
+                                      mi->dex_method_index_,
+                                      LocalInfoCallback,
+                                      &local_infos)) {
+          for (const DexFile::LocalInfo& var : local_infos) {
+            if (var.reg_ < dex_code->registers_size_ - dex_code->ins_size_) {
+              info_.StartTag(DW_TAG_variable);
+              WriteName(var.name_);
+              WriteLazyType(var.descriptor_);
+              bool is64bitValue = var.descriptor_[0] == 'D' || var.descriptor_[0] == 'J';
+              WriteRegLocation(mi, var.reg_, is64bitValue, compilation_unit.low_pc_,
+                               var.start_address_, var.end_address_);
+              info_.EndTag();
+            }
           }
         }
+
         info_.EndTag();
         CHECK_EQ(info_.Depth(), start_depth);  // Balanced start/end.
       }
@@ -708,8 +706,7 @@
     // to be enclosed in the right set of namespaces. Therefore we
     // just define all types lazily at the end of compilation unit.
     void WriteLazyType(const char* type_descriptor) {
-      DCHECK(type_descriptor != nullptr);
-      if (type_descriptor[0] != 'V') {
+      if (type_descriptor != nullptr && type_descriptor[0] != 'V') {
         lazy_types_.emplace(type_descriptor, info_.size());
         info_.WriteRef4(DW_AT_type, 0);
       }
@@ -724,7 +721,9 @@
 
    private:
     void WriteName(const char* name) {
-      info_.WriteStrp(DW_AT_name, owner_->WriteString(name));
+      if (name != nullptr) {
+        info_.WriteStrp(DW_AT_name, owner_->WriteString(name));
+      }
     }
 
     // Helper which writes DWARF expression referencing a register.
@@ -976,29 +975,15 @@
         continue;
       }
 
-      // Create mapping table from dex to source line.
-      struct DebugInfoCallbacks {
-        static bool NewPosition(void* ctx, uint32_t address, uint32_t line) {
-          auto* context = static_cast<DebugInfoCallbacks*>(ctx);
-          context->dex2line_.push_back({address, static_cast<int32_t>(line)});
-          return false;
-        }
-        DefaultSrcMap dex2line_;
-      } debug_info_callbacks;
-
       Elf_Addr method_address = text_address + mi->low_pc_;
 
+      PositionInfos position_infos;
       const DexFile* dex = mi->dex_file_;
-      if (mi->code_item_ != nullptr) {
-        dex->DecodeDebugInfo(mi->code_item_,
-                             (mi->access_flags_ & kAccStatic) != 0,
-                             mi->dex_method_index_,
-                             DebugInfoCallbacks::NewPosition,
-                             nullptr,
-                             &debug_info_callbacks);
+      if (!dex->DecodeDebugPositionInfo(mi->code_item_, PositionInfoCallback, &position_infos)) {
+        continue;
       }
 
-      if (debug_info_callbacks.dex2line_.empty()) {
+      if (position_infos.empty()) {
         continue;
       }
 
@@ -1053,20 +1038,23 @@
       opcodes.SetFile(file_index);
 
       // Generate mapping opcodes from PC to Java lines.
-      const DefaultSrcMap& dex2line_map = debug_info_callbacks.dex2line_;
-      if (file_index != 0 && !dex2line_map.empty()) {
+      if (file_index != 0) {
         bool first = true;
         for (SrcMapElem pc2dex : src_mapping_table) {
           uint32_t pc = pc2dex.from_;
           int dex_pc = pc2dex.to_;
-          auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex_pc));
-          if (dex2line.first) {
-            int line = dex2line.second;
+          // Find mapping with address with is greater than our dex pc; then go back one step.
+          auto ub = std::upper_bound(position_infos.begin(), position_infos.end(), dex_pc,
+              [](uint32_t address, const DexFile::PositionInfo& entry) {
+                  return address < entry.address_;
+              });
+          if (ub != position_infos.begin()) {
+            int line = (--ub)->line_;
             if (first) {
               first = false;
               if (pc > 0) {
                 // Assume that any preceding code is prologue.
-                int first_line = dex2line_map.front().to_;
+                int first_line = position_infos.front().line_;
                 // Prologue is not a sensible place for a breakpoint.
                 opcodes.NegateStmt();
                 opcodes.AddRow(method_address, first_line);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index f9f0eb8..afa4904 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1221,8 +1221,11 @@
     // Compiling the boot image, add null class loader.
     class_loaders_.insert(nullptr);
   }
-  if (!class_loaders_.empty()) {
-    CHECK_EQ(class_loaders_.size(), 1u) << "Should only have one real class loader in the image";
+  // class_loaders_ usually will not be empty, but may be empty if we attempt to create an image
+  // with no classes.
+  if (class_loaders_.size() == 1u) {
+    // Only write the class table if we have exactly one class loader. There may be cases where
+    // there are multiple class loaders if a class path is passed to dex2oat.
     ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
     for (mirror::ClassLoader* loader : class_loaders_) {
       ClassTable* table = class_linker->ClassTableForClassLoader(loader);
@@ -1272,6 +1275,8 @@
   auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
   *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
   cur_pos = interned_strings_section->End();
+  // Round up to the alignment the class table expects. See HashSet::WriteToMemory.
+  cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
   // Calculate the size of the class table section.
   auto* class_table_section = &sections[ImageHeader::kSectionClassTable];
   *class_table_section = ImageSection(cur_pos, class_table_bytes_);
@@ -1424,28 +1429,32 @@
   CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
   temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
 
-  // Write the class table(s) into the image.
-  ClassLinker* const class_linker = runtime->GetClassLinker();
-  const ImageSection& class_table_section = image_header->GetImageSection(
-      ImageHeader::kSectionClassTable);
-  uint8_t* const class_table_memory_ptr = image_->Begin() + class_table_section.Offset();
-  ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  size_t class_table_bytes = 0;
-  for (mirror::ClassLoader* loader : class_loaders_) {
-    ClassTable* table = class_linker->ClassTableForClassLoader(loader);
-    CHECK(table != nullptr);
-    uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
-    class_table_bytes += table->WriteToMemory(memory_ptr);
-    // Fixup the pointers in the newly written class table to contain image addresses. See
-    // above comment for intern tables.
-    ClassTable temp_class_table;
-    temp_class_table.ReadFromMemory(memory_ptr);
-    // CHECK_EQ(temp_class_table.NumNonZygoteClasses(), table->NumNonZygoteClasses());
-    BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
-                                                                    RootInfo(kRootUnknown));
-    temp_class_table.VisitRoots(buffered_visitor);
+  // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple
+  // class loaders. Writing multiple class tables into the image is currently unsupported.
+  if (class_table_bytes_ > 0u) {
+    ClassLinker* const class_linker = runtime->GetClassLinker();
+    const ImageSection& class_table_section = image_header->GetImageSection(
+        ImageHeader::kSectionClassTable);
+    uint8_t* const class_table_memory_ptr = image_->Begin() + class_table_section.Offset();
+    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    size_t class_table_bytes = 0;
+    for (mirror::ClassLoader* loader : class_loaders_) {
+      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
+      CHECK(table != nullptr);
+      uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
+      class_table_bytes += table->WriteToMemory(memory_ptr);
+      // Fixup the pointers in the newly written class table to contain image addresses. See
+      // above comment for intern tables.
+      ClassTable temp_class_table;
+      temp_class_table.ReadFromMemory(memory_ptr);
+      CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
+               table->NumZygoteClasses());
+      BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
+                                                                      RootInfo(kRootUnknown));
+      temp_class_table.VisitRoots(buffered_visitor);
+    }
+    CHECK_EQ(class_table_bytes, class_table_bytes_);
   }
-  CHECK_EQ(class_table_bytes, class_table_bytes_);
 }
 
 void ImageWriter::CopyAndFixupObjects() {
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index c20d836..8e930f0 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -457,7 +457,9 @@
   // Prune class memoization table to speed up ContainsBootClassLoaderNonImageClass.
   std::unordered_map<mirror::Class*, bool> prune_class_memo_;
 
-  // Class loaders with a class table to write out. Should only be one currently.
+  // Class loaders with a class table to write out. There should only be one class loader because
+  // dex2oat loads the dex files to be compiled into a single class loader. For the boot image,
+  // null is a valid entry.
   std::unordered_set<mirror::ClassLoader*> class_loaders_;
 
   // Number of image class table bytes.
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index f3bda2f..5ab55e0 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -219,7 +219,8 @@
   // calling through stub will link with &Java_MyClassNatives_bar
 
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary(env_, "", class_loader_, &reason))
+  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
+                  LoadNativeLibrary(env_, "", class_loader_, nullptr, nullptr, &reason))
       << reason;
 
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24);
@@ -233,7 +234,8 @@
   // calling through stub will link with &Java_MyClassNatives_sbar
 
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary(env_, "", class_loader_, &reason))
+  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
+                  LoadNativeLibrary(env_, "", class_loader_, nullptr, nullptr, &reason))
       << reason;
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index a2b29a3..eea5204 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -255,7 +255,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(131 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(132 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index ca71c32..c3979f3 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -90,9 +90,8 @@
 
   static constexpr const char* kBuilderPassName = "builder";
 
-  // The number of entries in a packed switch before we use a jump table or specified
-  // compare/jump series.
-  static constexpr uint16_t kSmallSwitchThreshold = 3;
+  // The number of entries in a packed switch before we use a jump table.
+  static constexpr uint16_t kSmallSwitchThreshold = 5;
 
  private:
   // Analyzes the dex instruction and adds HInstruction to the graph
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a941935..3630dbe 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,7 @@
 // S registers. Therefore there is no need to block it.
 static constexpr DRegister DTMP = D31;
 
-static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
 
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
@@ -3363,15 +3363,16 @@
     }
     default:
       LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
   }
 }
 
 void LocationsBuilderARM::VisitRor(HRor* op) {
-    HandleRotate(op);
+  HandleRotate(op);
 }
 
 void InstructionCodeGeneratorARM::VisitRor(HRor* op) {
-    HandleRotate(op);
+  HandleRotate(op);
 }
 
 void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
@@ -6249,7 +6250,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
+  if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold &&
       codegen_->GetAssembler()->IsThumb()) {
     locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
     if (switch_instr->GetStartValue() != 0) {
@@ -6265,30 +6266,12 @@
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) {
+  if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) {
     // Create a series of compare/jumps.
-    Register temp_reg = IP;
-    // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
-    // the immediate, because IP is used as the destination register. For the other
-    // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
-    // and they can be encoded in the instruction without making use of IP register.
-    __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound);
-
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    // Jump to successors[0] if value == lower_bound.
-    __ b(codegen_->GetLabelOf(successors[0]), EQ);
-    int32_t last_index = 0;
-    for (; num_entries - last_index > 2; last_index += 2) {
-      __ AddConstantSetFlags(temp_reg, temp_reg, -2);
-      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
-      __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO);
-      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
-      __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ);
-    }
-    if (num_entries - last_index == 2) {
-      // The last missing case_value.
-      GenerateCompareWithImmediate(temp_reg, 1);
-      __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
+    for (uint32_t i = 0; i < num_entries; i++) {
+      GenerateCompareWithImmediate(value_reg, lower_bound + i);
+      __ b(codegen_->GetLabelOf(successors[i]), EQ);
     }
 
     // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b7efbe3..451470f 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -71,10 +71,10 @@
 using helpers::ArtVixlRegCodeCoherentForRegSet;
 
 static constexpr int kCurrentMethodStackOffset = 0;
-// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
+// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
 // generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
 
 inline Condition ARM64Condition(IfCondition cond) {
   switch (cond) {
@@ -546,7 +546,7 @@
 
 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
   uint32_t num_entries = switch_instr_->GetNumEntries();
-  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
+  DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
 
   // We are about to use the assembler to place literals directly. Make sure we have enough
   // underlying code buffer and we have generated the jump table with right size.
@@ -4243,6 +4243,7 @@
 
     default:
       LOG(FATAL) << "Unexpected rem type " << type;
+      UNREACHABLE();
   }
 }
 
@@ -4317,6 +4318,7 @@
 
     default:
       LOG(FATAL) << "Unimplemented local type " << field_type;
+      UNREACHABLE();
   }
 }
 
@@ -4580,29 +4582,20 @@
   // ranges and emit the tables only as required.
   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
 
-  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
+  if (num_entries < kPackedSwitchJumpTableThreshold ||
       // Current instruction id is an upper bound of the number of HIRs in the graph.
       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
     // Create a series of compare/jumps.
-    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
-    Register temp = temps.AcquireW();
-    __ Subs(temp, value_reg, Operand(lower_bound));
-
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    // Jump to successors[0] if value == lower_bound.
-    __ B(eq, codegen_->GetLabelOf(successors[0]));
-    int32_t last_index = 0;
-    for (; num_entries - last_index > 2; last_index += 2) {
-      __ Subs(temp, temp, Operand(2));
-      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
-      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
-      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
-      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
-    }
-    if (num_entries - last_index == 2) {
-      // The last missing case_value.
-      __ Cmp(temp, Operand(1));
-      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
+    for (uint32_t i = 0; i < num_entries; i++) {
+      int32_t case_value = lower_bound + i;
+      vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
+      if (case_value == 0) {
+        __ Cbz(value_reg, succ);
+      } else {
+        __ Cmp(value_reg, Operand(case_value));
+        __ B(eq, succ);
+      }
     }
 
     // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index ce7cbcd..5dc101b 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1956,11 +1956,8 @@
 void LocationsBuilderMIPS::VisitCompare(HCompare* compare) {
   Primitive::Type in_type = compare->InputAt(0)->GetType();
 
-  LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
-      ? LocationSummary::kCall
-      : LocationSummary::kNoCall;
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
 
   switch (in_type) {
     case Primitive::kPrimLong:
@@ -1971,13 +1968,11 @@
       break;
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      InvokeRuntimeCallingConvention calling_convention;
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
-      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
-    }
 
     default:
       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
@@ -1986,7 +1981,10 @@
 
 void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Register res = locations->Out().AsRegister<Register>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
+  bool gt_bias = instruction->IsGtBias();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   //  0 if: left == right
   //  1 if: left  > right
@@ -1994,7 +1992,6 @@
   switch (in_type) {
     case Primitive::kPrimLong: {
       MipsLabel done;
-      Register res = locations->Out().AsRegister<Register>();
       Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
       Register lhs_low  = locations->InAt(0).AsRegisterPairLow<Register>();
       Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>();
@@ -2011,45 +2008,82 @@
       break;
     }
 
-    case Primitive::kPrimFloat:
+    case Primitive::kPrimFloat: {
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      MipsLabel done;
+      if (isR6) {
+        __ CmpEqS(FTMP, lhs, rhs);
+        __ LoadConst32(res, 0);
+        __ Bc1nez(FTMP, &done);
+        if (gt_bias) {
+          __ CmpLtS(FTMP, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, 1);
+        } else {
+          __ CmpLtS(FTMP, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, -1);
+        }
+      } else {
+        if (gt_bias) {
+          __ ColtS(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1t(0, &done);
+          __ CeqS(0, lhs, rhs);
+          __ LoadConst32(res, 1);
+          __ Movt(res, ZERO, 0);
+        } else {
+          __ ColtS(0, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1t(0, &done);
+          __ CeqS(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Movt(res, ZERO, 0);
+        }
+      }
+      __ Bind(&done);
+      break;
+    }
     case Primitive::kPrimDouble: {
-      int32_t entry_point_offset;
-      bool direct;
-      if (in_type == Primitive::kPrimFloat) {
-        if (instruction->IsGtBias()) {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmpgFloat);
-          direct = IsDirectEntrypoint(kQuickCmpgFloat);
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      MipsLabel done;
+      if (isR6) {
+        __ CmpEqD(FTMP, lhs, rhs);
+        __ LoadConst32(res, 0);
+        __ Bc1nez(FTMP, &done);
+        if (gt_bias) {
+          __ CmpLtD(FTMP, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, 1);
         } else {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmplFloat);
-          direct = IsDirectEntrypoint(kQuickCmplFloat);
+          __ CmpLtD(FTMP, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1nez(FTMP, &done);
+          __ LoadConst32(res, -1);
         }
       } else {
-        if (instruction->IsGtBias()) {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmpgDouble);
-          direct = IsDirectEntrypoint(kQuickCmpgDouble);
+        if (gt_bias) {
+          __ ColtD(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Bc1t(0, &done);
+          __ CeqD(0, lhs, rhs);
+          __ LoadConst32(res, 1);
+          __ Movt(res, ZERO, 0);
         } else {
-          entry_point_offset = QUICK_ENTRY_POINT(pCmplDouble);
-          direct = IsDirectEntrypoint(kQuickCmplDouble);
+          __ ColtD(0, rhs, lhs);
+          __ LoadConst32(res, 1);
+          __ Bc1t(0, &done);
+          __ CeqD(0, lhs, rhs);
+          __ LoadConst32(res, -1);
+          __ Movt(res, ZERO, 0);
         }
       }
-      codegen_->InvokeRuntime(entry_point_offset,
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr,
-                              direct);
-      if (in_type == Primitive::kPrimFloat) {
-        if (instruction->IsGtBias()) {
-          CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>();
-        } else {
-          CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>();
-        }
-      } else {
-        if (instruction->IsGtBias()) {
-          CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>();
-        } else {
-          CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>();
-        }
-      }
+      __ Bind(&done);
       break;
     }
 
@@ -2060,8 +2094,19 @@
 
 void LocationsBuilderMIPS::VisitCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  switch (instruction->InputAt(0)->GetType()) {
+    default:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      break;
+  }
   if (instruction->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
@@ -2071,151 +2116,45 @@
   if (!instruction->NeedsMaterialization()) {
     return;
   }
-  // TODO: generalize to long
-  DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong);
 
+  Primitive::Type type = instruction->InputAt(0)->GetType();
   LocationSummary* locations = instruction->GetLocations();
   Register dst = locations->Out().AsRegister<Register>();
+  MipsLabel true_label;
 
-  Register lhs = locations->InAt(0).AsRegister<Register>();
-  Location rhs_location = locations->InAt(1);
+  switch (type) {
+    default:
+      // Integer case.
+      GenerateIntCompare(instruction->GetCondition(), locations);
+      return;
 
-  Register rhs_reg = ZERO;
-  int64_t rhs_imm = 0;
-  bool use_imm = rhs_location.IsConstant();
-  if (use_imm) {
-    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-  } else {
-    rhs_reg = rhs_location.AsRegister<Register>();
-  }
-
-  IfCondition if_cond = instruction->GetCondition();
-
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      if (use_imm && IsUint<16>(rhs_imm)) {
-        __ Xori(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Xor(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondEQ) {
-        __ Sltiu(dst, dst, 1);
-      } else {
-        __ Sltu(dst, ZERO, dst);
-      }
+    case Primitive::kPrimLong:
+      // TODO: don't use branches.
+      GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label);
       break;
 
-    case kCondLT:
-    case kCondGE:
-      if (use_imm && IsInt<16>(rhs_imm)) {
-        __ Slti(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondGE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the slt instruction but no sge.
-        __ Xori(dst, dst, 1);
-      }
-      break;
-
-    case kCondLE:
-    case kCondGT:
-      if (use_imm && IsInt<16>(rhs_imm + 1)) {
-        // Simulate lhs <= rhs via lhs < rhs + 1.
-        __ Slti(dst, lhs, rhs_imm + 1);
-        if (if_cond == kCondGT) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the slti instruction but no sgti.
-          __ Xori(dst, dst, 1);
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, rhs_reg, lhs);
-        if (if_cond == kCondLE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the slt instruction but no sle.
-          __ Xori(dst, dst, 1);
-        }
-      }
-      break;
-
-    case kCondB:
-    case kCondAE:
-      // Use sltiu instruction if rhs_imm is in range [0, 32767] or in
-      // [max_unsigned - 32767 = 0xffff8000, max_unsigned = 0xffffffff].
-      if (use_imm &&
-          (IsUint<15>(rhs_imm) ||
-              IsUint<15>(rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) {
-        if (IsUint<15>(rhs_imm)) {
-          __ Sltiu(dst, lhs, rhs_imm);
-        } else {
-          // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended
-          // and then used as unsigned integer (range [0xffff8000, 0xffffffff]).
-          __ Sltiu(dst, lhs, rhs_imm - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16)));
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondAE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the sltu instruction but no sgeu.
-        __ Xori(dst, dst, 1);
-      }
-      break;
-
-    case kCondBE:
-    case kCondA:
-      // Use sltiu instruction if rhs_imm is in range [0, 32766] or in
-      // [max_unsigned - 32767 - 1 = 0xffff7fff, max_unsigned - 1 = 0xfffffffe].
-      // lhs <= rhs is simulated via lhs < rhs + 1.
-      if (use_imm && (rhs_imm != -1) &&
-          (IsUint<15>(rhs_imm + 1) ||
-              IsUint<15>(rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(15))))) {
-        if (IsUint<15>(rhs_imm + 1)) {
-          // Simulate lhs <= rhs via lhs < rhs + 1.
-          __ Sltiu(dst, lhs, rhs_imm + 1);
-        } else {
-          // 16-bit value (in range [0x8000, 0xffff]) passed to sltiu is sign-extended
-          // and then used as unsigned integer (range [0xffff8000, 0xffffffff] where rhs_imm
-          // is in range [0xffff7fff, 0xfffffffe] since lhs <= rhs is simulated via lhs < rhs + 1).
-          __ Sltiu(dst, lhs, rhs_imm + 1 - (MaxInt<uint64_t>(32) - MaxInt<uint64_t>(16)));
-        }
-        if (if_cond == kCondA) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the sltiu instruction but no sgtiu.
-          __ Xori(dst, dst, 1);
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, rhs_reg, lhs);
-        if (if_cond == kCondBE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the sltu instruction but no sleu.
-          __ Xori(dst, dst, 1);
-        }
-      }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      // TODO: don't use branches.
+      GenerateFpCompareAndBranch(instruction->GetCondition(),
+                                 instruction->IsGtBias(),
+                                 type,
+                                 locations,
+                                 &true_label);
       break;
   }
+
+  // Convert the branches into the result.
+  MipsLabel done;
+
+  // False case: result = 0.
+  __ LoadConst32(dst, 0);
+  __ B(&done);
+
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ LoadConst32(dst, 1);
+  __ Bind(&done);
 }
 
 void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
@@ -2574,6 +2513,627 @@
   }
 }
 
+void InstructionCodeGeneratorMIPS::GenerateIntCompare(IfCondition cond,
+                                                      LocationSummary* locations) {
+  Register dst = locations->Out().AsRegister<Register>();
+  Register lhs = locations->InAt(0).AsRegister<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+  } else {
+    rhs_reg = rhs_location.AsRegister<Register>();
+  }
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondEQ) {
+        __ Sltiu(dst, dst, 1);
+      } else {
+        __ Sltu(dst, ZERO, dst);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondGE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the slt instruction but no sge.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm + 1);
+        if (cond == kCondGT) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the slti instruction but no sgti.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        if (cond == kCondLE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the slt instruction but no sle.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+
+    case kCondB:
+    case kCondAE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0xffff8000, 0xffffffff].
+        __ Sltiu(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondAE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the sltu instruction but no sgeu.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondBE:
+    case kCondA:
+      if (use_imm && (rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        // Note that this only works if rhs + 1 does not overflow
+        // to 0, hence the check above.
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0xffff8000, 0xffffffff].
+        __ Sltiu(dst, lhs, rhs_imm + 1);
+        if (cond == kCondA) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the sltiu instruction but no sgtiu.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, rhs_reg, lhs);
+        if (cond == kCondBE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the sltu instruction but no sleu.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond,
+                                                               LocationSummary* locations,
+                                                               MipsLabel* label) {
+  Register lhs = locations->InAt(0).AsRegister<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_reg = ZERO;
+  int32_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+  } else {
+    rhs_reg = rhs_location.AsRegister<Register>();
+  }
+
+  if (use_imm && rhs_imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Beqz(lhs, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Bnez(lhs, label);
+        break;
+      case kCondLT:
+        __ Bltz(lhs, label);
+        break;
+      case kCondGE:
+        __ Bgez(lhs, label);
+        break;
+      case kCondLE:
+        __ Blez(lhs, label);
+        break;
+      case kCondGT:
+        __ Bgtz(lhs, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ B(label);
+        break;
+    }
+  } else {
+    if (use_imm) {
+      // TODO: more efficient comparison with 16-bit constants without loading them into TMP.
+      rhs_reg = TMP;
+      __ LoadConst32(rhs_reg, rhs_imm);
+    }
+    switch (cond) {
+      case kCondEQ:
+        __ Beq(lhs, rhs_reg, label);
+        break;
+      case kCondNE:
+        __ Bne(lhs, rhs_reg, label);
+        break;
+      case kCondLT:
+        __ Blt(lhs, rhs_reg, label);
+        break;
+      case kCondGE:
+        __ Bge(lhs, rhs_reg, label);
+        break;
+      case kCondLE:
+        __ Bge(rhs_reg, lhs, label);
+        break;
+      case kCondGT:
+        __ Blt(rhs_reg, lhs, label);
+        break;
+      case kCondB:
+        __ Bltu(lhs, rhs_reg, label);
+        break;
+      case kCondAE:
+        __ Bgeu(lhs, rhs_reg, label);
+        break;
+      case kCondBE:
+        __ Bgeu(rhs_reg, lhs, label);
+        break;
+      case kCondA:
+        __ Bltu(rhs_reg, lhs, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond,
+                                                                LocationSummary* locations,
+                                                                MipsLabel* label) {
+  Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
+  Location rhs_location = locations->InAt(1);
+  Register rhs_high = ZERO;
+  Register rhs_low = ZERO;
+  int64_t imm = 0;
+  uint32_t imm_high = 0;
+  uint32_t imm_low = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    imm = rhs_location.GetConstant()->AsLongConstant()->GetValue();
+    imm_high = High32Bits(imm);
+    imm_low = Low32Bits(imm);
+  } else {
+    rhs_high = rhs_location.AsRegisterPairHigh<Register>();
+    rhs_low = rhs_location.AsRegisterPairLow<Register>();
+  }
+
+  if (use_imm && imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Beqz(TMP, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Bnez(TMP, label);
+        break;
+      case kCondLT:
+        __ Bltz(lhs_high, label);
+        break;
+      case kCondGE:
+        __ Bgez(lhs_high, label);
+        break;
+      case kCondLE:
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Sra(AT, lhs_high, 31);
+        __ Bgeu(AT, TMP, label);
+        break;
+      case kCondGT:
+        __ Or(TMP, lhs_high, lhs_low);
+        __ Sra(AT, lhs_high, 31);
+        __ Bltu(AT, TMP, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ B(label);
+        break;
+    }
+  } else if (use_imm) {
+    // TODO: more efficient comparison with constants without loading them into TMP/AT.
+    switch (cond) {
+      case kCondEQ:
+        __ LoadConst32(TMP, imm_high);
+        __ Xor(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Xor(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondNE:
+        __ LoadConst32(TMP, imm_high);
+        __ Xor(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Xor(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Bnez(TMP, label);
+        break;
+      case kCondLT:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(lhs_high, TMP, label);
+        __ Slt(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondGE:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(TMP, lhs_high, label);
+        __ Slt(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondLE:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(lhs_high, TMP, label);
+        __ Slt(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondGT:
+        __ LoadConst32(TMP, imm_high);
+        __ Blt(TMP, lhs_high, label);
+        __ Slt(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondB:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(lhs_high, TMP, label);
+        __ Sltu(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondAE:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(TMP, lhs_high, label);
+        __ Sltu(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, lhs_low, AT);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondBE:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(lhs_high, TMP, label);
+        __ Sltu(TMP, TMP, lhs_high);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondA:
+        __ LoadConst32(TMP, imm_high);
+        __ Bltu(TMP, lhs_high, label);
+        __ Sltu(TMP, lhs_high, TMP);
+        __ LoadConst32(AT, imm_low);
+        __ Sltu(AT, AT, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+    }
+  } else {
+    switch (cond) {
+      case kCondEQ:
+        __ Xor(TMP, lhs_high, rhs_high);
+        __ Xor(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondNE:
+        __ Xor(TMP, lhs_high, rhs_high);
+        __ Xor(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Bnez(TMP, label);
+        break;
+      case kCondLT:
+        __ Blt(lhs_high, rhs_high, label);
+        __ Slt(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondGE:
+        __ Blt(rhs_high, lhs_high, label);
+        __ Slt(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondLE:
+        __ Blt(lhs_high, rhs_high, label);
+        __ Slt(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondGT:
+        __ Blt(rhs_high, lhs_high, label);
+        __ Slt(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondB:
+        __ Bltu(lhs_high, rhs_high, label);
+        __ Sltu(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+      case kCondAE:
+        __ Bltu(rhs_high, lhs_high, label);
+        __ Sltu(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, lhs_low, rhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondBE:
+        __ Bltu(lhs_high, rhs_high, label);
+        __ Sltu(TMP, rhs_high, lhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Or(TMP, TMP, AT);
+        __ Beqz(TMP, label);
+        break;
+      case kCondA:
+        __ Bltu(rhs_high, lhs_high, label);
+        __ Sltu(TMP, lhs_high, rhs_high);
+        __ Sltu(AT, rhs_low, lhs_low);
+        __ Blt(TMP, AT, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
+                                                              bool gt_bias,
+                                                              Primitive::Type type,
+                                                              LocationSummary* locations,
+                                                              MipsLabel* label) {
+  FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  if (type == Primitive::kPrimFloat) {
+    if (isR6) {
+      switch (cond) {
+        case kCondEQ:
+          __ CmpEqS(FTMP, lhs, rhs);
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondNE:
+          __ CmpEqS(FTMP, lhs, rhs);
+          __ Bc1eqz(FTMP, label);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ CmpLtS(FTMP, lhs, rhs);
+          } else {
+            __ CmpUltS(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ CmpLeS(FTMP, lhs, rhs);
+          } else {
+            __ CmpUleS(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CmpUltS(FTMP, rhs, lhs);
+          } else {
+            __ CmpLtS(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CmpUleS(FTMP, rhs, lhs);
+          } else {
+            __ CmpLeS(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    } else {
+      switch (cond) {
+        case kCondEQ:
+          __ CeqS(0, lhs, rhs);
+          __ Bc1t(0, label);
+          break;
+        case kCondNE:
+          __ CeqS(0, lhs, rhs);
+          __ Bc1f(0, label);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ ColtS(0, lhs, rhs);
+          } else {
+            __ CultS(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ ColeS(0, lhs, rhs);
+          } else {
+            __ CuleS(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CultS(0, rhs, lhs);
+          } else {
+            __ ColtS(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CuleS(0, rhs, lhs);
+          } else {
+            __ ColeS(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    }
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    if (isR6) {
+      switch (cond) {
+        case kCondEQ:
+          __ CmpEqD(FTMP, lhs, rhs);
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondNE:
+          __ CmpEqD(FTMP, lhs, rhs);
+          __ Bc1eqz(FTMP, label);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ CmpLtD(FTMP, lhs, rhs);
+          } else {
+            __ CmpUltD(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ CmpLeD(FTMP, lhs, rhs);
+          } else {
+            __ CmpUleD(FTMP, lhs, rhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CmpUltD(FTMP, rhs, lhs);
+          } else {
+            __ CmpLtD(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CmpUleD(FTMP, rhs, lhs);
+          } else {
+            __ CmpLeD(FTMP, rhs, lhs);
+          }
+          __ Bc1nez(FTMP, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    } else {
+      switch (cond) {
+        case kCondEQ:
+          __ CeqD(0, lhs, rhs);
+          __ Bc1t(0, label);
+          break;
+        case kCondNE:
+          __ CeqD(0, lhs, rhs);
+          __ Bc1f(0, label);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ ColtD(0, lhs, rhs);
+          } else {
+            __ CultD(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ ColeD(0, lhs, rhs);
+          } else {
+            __ CuleD(0, lhs, rhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CultD(0, rhs, lhs);
+          } else {
+            __ ColtD(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CuleD(0, rhs, lhs);
+          } else {
+            __ ColeD(0, rhs, lhs);
+          }
+          __ Bc1t(0, label);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition";
+      }
+    }
+  }
+}
+
 void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction,
                                                          size_t condition_input_index,
                                                          MipsLabel* true_target,
@@ -2610,7 +3170,7 @@
     // The condition instruction has been materialized, compare the output to 0.
     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
     DCHECK(cond_val.IsRegister());
-      if (true_target == nullptr) {
+    if (true_target == nullptr) {
       __ Beqz(cond_val.AsRegister<Register>(), false_target);
     } else {
       __ Bnez(cond_val.AsRegister<Register>(), true_target);
@@ -2619,98 +3179,27 @@
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
     HCondition* condition = cond->AsCondition();
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    LocationSummary* locations = cond->GetLocations();
+    IfCondition if_cond = condition->GetCondition();
+    MipsLabel* branch_target = true_target;
 
-    Register lhs = condition->GetLocations()->InAt(0).AsRegister<Register>();
-    Location rhs_location = condition->GetLocations()->InAt(1);
-    Register rhs_reg = ZERO;
-    int32_t rhs_imm = 0;
-    bool use_imm = rhs_location.IsConstant();
-    if (use_imm) {
-      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-    } else {
-      rhs_reg = rhs_location.AsRegister<Register>();
-    }
-
-    IfCondition if_cond;
-    MipsLabel* non_fallthrough_target;
     if (true_target == nullptr) {
       if_cond = condition->GetOppositeCondition();
-      non_fallthrough_target = false_target;
-    } else {
-      if_cond = condition->GetCondition();
-      non_fallthrough_target = true_target;
+      branch_target = false_target;
     }
 
-    if (use_imm && rhs_imm == 0) {
-      switch (if_cond) {
-        case kCondEQ:
-          __ Beqz(lhs, non_fallthrough_target);
-          break;
-        case kCondNE:
-          __ Bnez(lhs, non_fallthrough_target);
-          break;
-        case kCondLT:
-          __ Bltz(lhs, non_fallthrough_target);
-          break;
-        case kCondGE:
-          __ Bgez(lhs, non_fallthrough_target);
-          break;
-        case kCondLE:
-          __ Blez(lhs, non_fallthrough_target);
-          break;
-        case kCondGT:
-          __ Bgtz(lhs, non_fallthrough_target);
-          break;
-        case kCondB:
-          break;  // always false
-        case kCondBE:
-          __ Beqz(lhs, non_fallthrough_target);  // <= 0 if zero
-          break;
-        case kCondA:
-          __ Bnez(lhs, non_fallthrough_target);  // > 0 if non-zero
-          break;
-        case kCondAE:
-          __ B(non_fallthrough_target);  // always true
-          break;
-      }
-    } else {
-      if (use_imm) {
-        // TODO: more efficient comparison with 16-bit constants without loading them into TMP.
-        rhs_reg = TMP;
-        __ LoadConst32(rhs_reg, rhs_imm);
-      }
-      switch (if_cond) {
-        case kCondEQ:
-          __ Beq(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondNE:
-          __ Bne(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondLT:
-          __ Blt(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondGE:
-          __ Bge(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondLE:
-          __ Bge(rhs_reg, lhs, non_fallthrough_target);
-          break;
-        case kCondGT:
-          __ Blt(rhs_reg, lhs, non_fallthrough_target);
-          break;
-        case kCondB:
-          __ Bltu(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondAE:
-          __ Bgeu(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondBE:
-          __ Bgeu(rhs_reg, lhs, non_fallthrough_target);
-          break;
-        case kCondA:
-          __ Bltu(rhs_reg, lhs, non_fallthrough_target);
-          break;
-      }
+    switch (type) {
+      default:
+        GenerateIntCompareAndBranch(if_cond, locations, branch_target);
+        break;
+      case Primitive::kPrimLong:
+        GenerateLongCompareAndBranch(if_cond, locations, branch_target);
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target);
+        break;
     }
   }
 
@@ -3413,9 +3902,9 @@
 }
 
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
+  LocationSummary::CallKind call_kind = load->IsInDexCache()
+      ? LocationSummary::kNoCall
+      : LocationSummary::kCallOnSlowPath;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
@@ -4408,31 +4897,19 @@
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
   // Create a set of compare/jumps.
-  Register temp_reg = TMP;
-  __ Addiu32(temp_reg, value_reg, -lower_bound);
-  // Jump to default if index is negative
-  // Note: We don't check the case that index is positive while value < lower_bound, because in
-  // this case, index >= num_entries must be true. So that we can save one branch instruction.
-  __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
-
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  // Jump to successors[0] if value == lower_bound.
-  __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
-  int32_t last_index = 0;
-  for (; num_entries - last_index > 2; last_index += 2) {
-    __ Addiu(temp_reg, temp_reg, -2);
-    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
-    __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
-    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
-    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
-  }
-  if (num_entries - last_index == 2) {
-    // The last missing case_value.
-    __ Addiu(temp_reg, temp_reg, -1);
-    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+  for (int32_t i = 0; i < num_entries; ++i) {
+    int32_t case_value = lower_bound + i;
+    MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
+    if (case_value == 0) {
+      __ Beqz(value_reg, successor_label);
+    } else {
+      __ LoadConst32(TMP, case_value);
+      __ Beq(value_reg, TMP, successor_label);
+    }
   }
 
-  // And the default for any other value.
+  // Insert the default branch for every other value.
   if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
     __ B(codegen_->GetLabelOf(default_block));
   }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index caf3174..1ee6bde 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -225,6 +225,18 @@
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
+  void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
+  void GenerateIntCompareAndBranch(IfCondition cond,
+                                   LocationSummary* locations,
+                                   MipsLabel* label);
+  void GenerateLongCompareAndBranch(IfCondition cond,
+                                    LocationSummary* locations,
+                                    MipsLabel* label);
+  void GenerateFpCompareAndBranch(IfCondition cond,
+                                  bool gt_bias,
+                                  Primitive::Type type,
+                                  LocationSummary* locations,
+                                  MipsLabel* label);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              MipsLabel* true_target,
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 1a9de15..99f58dd 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3051,7 +3051,8 @@
   } else {
     __ LoadFromOffset(kLoadDoubleword, out, current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value());
-    __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+    __ LoadFromOffset(
+        kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
     // TODO: We will need a read barrier here.
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -3105,9 +3106,9 @@
 }
 
 void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier)
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
+  LocationSummary::CallKind call_kind = load->IsInDexCache()
+      ? LocationSummary::kNoCall
+      : LocationSummary::kCallOnSlowPath;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
@@ -3120,7 +3121,8 @@
   __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
                     ArtMethod::DeclaringClassOffset().Int32Value());
   __ LoadFromOffset(kLoadDoubleword, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  __ LoadFromOffset(
+      kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
   // TODO: We will need a read barrier here.
 
   if (!load->IsInDexCache()) {
@@ -3989,34 +3991,17 @@
   GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a set of compare/jumps.
-  GpuRegister temp_reg = TMP;
-  if (IsInt<16>(-lower_bound)) {
-    __ Addiu(temp_reg, value_reg, -lower_bound);
-  } else {
-    __ LoadConst32(AT, -lower_bound);
-    __ Addu(temp_reg, value_reg, AT);
-  }
-  // Jump to default if index is negative
-  // Note: We don't check the case that index is positive while value < lower_bound, because in
-  // this case, index >= num_entries must be true. So that we can save one branch instruction.
-  __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block));
-
+  // Create a series of compare/jumps.
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  // Jump to successors[0] if value == lower_bound.
-  __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0]));
-  int32_t last_index = 0;
-  for (; num_entries - last_index > 2; last_index += 2) {
-    __ Addiu(temp_reg, temp_reg, -2);
-    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
-    __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
-    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
-    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
-  }
-  if (num_entries - last_index == 2) {
-    // The last missing case_value.
-    __ Addiu(temp_reg, temp_reg, -1);
-    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+  for (int32_t i = 0; i < num_entries; i++) {
+    int32_t case_value = lower_bound + i;
+    Mips64Label* succ = codegen_->GetLabelOf(successors[i]);
+    if (case_value == 0) {
+      __ Beqzc(value_reg, succ);
+    } else {
+      __ LoadConst32(TMP, case_value);
+      __ Beqc(value_reg, TMP, succ);
+    }
   }
 
   // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0fb552a..bc3256e 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -42,6 +42,7 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = EAX;
+
 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
 
 static constexpr int kC2ConditionMask = 0x400;
@@ -433,6 +434,56 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
  public:
@@ -454,7 +505,7 @@
     // to be instrumented, e.g.:
     //
     //   __ movl(out, Address(out, offset));
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -470,7 +521,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -612,14 +665,18 @@
 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     Register reg_out = out_.AsRegister<Register>();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1831,7 +1888,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
@@ -4092,7 +4149,7 @@
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
@@ -4346,9 +4403,14 @@
 
   if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
     // Long values can be loaded atomically into an XMM using movsd.
-    // So we use an XMM register as a temp to achieve atomicity (first load the temp into the XMM
-    // and then copy the XMM into the output 32bits at a time).
+    // So we use an XMM register as a temp to achieve atomicity (first
+    // load the temp into the XMM and then copy the XMM into the
+    // output, 32 bits at a time).
     locations->AddTemp(Location::RequiresFpuRegister());
+  } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -4386,9 +4448,32 @@
     }
 
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
       __ movl(out.AsRegister<Register>(), Address(base, offset));
       break;
+
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ movl(out.AsRegister<Register>(), Address(base, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
     }
 
     case Primitive::kPrimLong: {
@@ -4423,17 +4508,20 @@
       UNREACHABLE();
   }
 
-  // Longs are handled in the switch.
-  if (field_type != Primitive::kPrimLong) {
+  if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) {
+    // Potential implicit null checks, in the case of reference or
+    // long fields, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
-  }
-
-  if (field_type == Primitive::kPrimNot) {
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -4498,7 +4586,7 @@
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
   bool maybe_record_implicit_null_check_done = false;
@@ -4603,7 +4691,7 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -4784,6 +4872,11 @@
             Location::kOutputOverlap :
             Location::kNoOutputOverlap);
   }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
@@ -4791,12 +4884,13 @@
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
+  Location out_loc = locations->Out();
 
   Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4808,7 +4902,7 @@
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4820,7 +4914,7 @@
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4832,7 +4926,7 @@
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4842,13 +4936,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      static_assert(
-          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4858,20 +4948,56 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        Register out = out_loc.AsRegister<Register>();
+        if (index.IsConstant()) {
+          uint32_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ movl(out, Address(obj, offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      Location out = locations->Out();
-      DCHECK_NE(obj, out.AsRegisterPairLow<Register>());
+      DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movl(out.AsRegisterPairLow<Register>(), Address(obj, offset));
+        __ movl(out_loc.AsRegisterPairLow<Register>(), Address(obj, offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
+        __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
       } else {
-        __ movl(out.AsRegisterPairLow<Register>(),
+        __ movl(out_loc.AsRegisterPairLow<Register>(),
                 Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ movl(out.AsRegisterPairHigh<Register>(),
+        __ movl(out_loc.AsRegisterPairHigh<Register>(),
                 Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize));
       }
       break;
@@ -4879,7 +5005,7 @@
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4891,7 +5017,7 @@
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4906,23 +5032,12 @@
       UNREACHABLE();
   }
 
-  if (type != Primitive::kPrimLong) {
+  if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) {
+    // Potential implicit null checks, in the case of reference or
+    // long arrays, are handled in the previous switch statement.
+  } else {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
-
-  if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-    Location out = locations->Out();
-    if (index.IsConstant()) {
-      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
-    } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
-    }
-  }
 }
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
@@ -5054,12 +5169,12 @@
           //   __ movl(temp2, temp);
           //   // /* HeapReference<Class> */ temp = temp->component_type_
           //   __ movl(temp, Address(temp, component_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = register_value->klass_
           //   __ movl(temp2, Address(register_value, class_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
           //
           //   __ cmpl(temp, temp2);
@@ -5340,8 +5455,8 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ fs()->cmpw(Address::Absolute(
-      Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
+  __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()),
+                Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -5622,32 +5737,16 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ leal(out, Address(current_method, declaring_class_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ movl(out, Address(current_method, declaring_class_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movl(out, Address(current_method,
                          ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
-
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ leal(out, Address(out, cache_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ movl(out, Address(out, cache_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -5711,30 +5810,14 @@
   Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ leal(out, Address(current_method, declaring_class_offset));
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ movl(out, Address(current_method, declaring_class_offset));
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
-
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ leal(out, Address(out, cache_offset));
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ movl(out, Address(out, cache_offset));
-  }
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
   if (!load->IsInDexCache()) {
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
@@ -5782,6 +5865,14 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5807,21 +5898,22 @@
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
+  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5837,10 +5929,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ movl(out, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
@@ -5861,17 +5952,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5900,17 +5982,8 @@
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5934,17 +6007,8 @@
       }
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp = temp_loc.AsRegister<Register>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ movl(out, Address(out, component_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5988,6 +6052,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
                                                                     /* is_fatal */ false);
@@ -6040,27 +6111,27 @@
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
+  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -6080,8 +6151,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ movl(temp, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -6103,18 +6173,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -6127,8 +6187,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -6154,18 +6213,8 @@
       }
       __ j(kEqual, &done);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -6177,8 +6226,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6195,19 +6243,8 @@
       __ j(kEqual, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        Register temp2 = temp2_loc.AsRegister<Register>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ movl(temp, Address(temp, component_offset));
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -6221,8 +6258,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -6230,8 +6266,7 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -6248,6 +6283,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -6409,14 +6451,226 @@
   }
 }
 
-void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction,
-                                           Location out,
-                                           Location ref,
-                                           Location obj,
-                                           uint32_t offset,
-                                           Location index) {
+void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                   Location out,
+                                                                   uint32_t offset,
+                                                                   Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ movl(temp.AsRegister<Register>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ movl(out_reg, Address(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ movl(out_reg, Address(out_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                    Location out,
+                                                                    Location obj,
+                                                                    uint32_t offset,
+                                                                    Location temp) {
+  Register out_reg = out.AsRegister<Register>();
+  Register obj_reg = obj.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ movl(out_reg, Address(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ movl(out_reg, Address(obj_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                          Location root,
+                                                          Register obj,
+                                                          uint32_t offset) {
+  Register root_reg = root.AsRegister<Register>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ movl(root_reg, Address(obj, offset));
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86WordSize>().Int32Value()),
+                    Immediate(0));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ leal(root_reg, Address(obj, offset));
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ movl(root_reg, Address(obj, offset));
+  }
+}
+
+void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t offset,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Address src(obj, offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                             Location ref,
+                                                             Register obj,
+                                                             uint32_t data_offset,
+                                                             Location index,
+                                                             Location temp,
+                                                             bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  Address src = index.IsConstant() ?
+      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+      Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 Register obj,
+                                                                 const Address& src,
+                                                                 Location temp,
+                                                                 bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes;
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  Register ref_reg = ref.AsRegister<Register>();
+  Register temp_reg = temp.AsRegister<Register>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ movl(temp_reg, Address(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
+  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Load fence to prevent load-load reordering.
+  // Note that this is a no-op, thanks to the x86 memory model.
+  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+  // The actual reference load.
+  // /* HeapReference<Object> */ ref = *src
+  __ movl(ref_reg, src);
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
+  __ j(kEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
+                                               Location out,
+                                               Location ref,
+                                               Location obj,
+                                               uint32_t offset,
+                                               Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -6430,57 +6684,41 @@
       ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                Location out,
-                                                Location ref,
-                                                Location obj,
-                                                uint32_t offset,
-                                                Location index) {
+void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                    Location out,
+                                                    Location ref,
+                                                    Location obj,
+                                                    uint32_t offset,
+                                                    Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     __ UnpoisonHeapReference(out.AsRegister<Register>());
   }
 }
 
-void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                  Location out,
-                                                  Location root) {
+void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                      Location out,
+                                                      Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -6514,67 +6752,31 @@
   locations->SetInAt(0, Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
-                                                              int32_t lower_bound,
-                                                              uint32_t num_entries,
-                                                              HBasicBlock* switch_block,
-                                                              HBasicBlock* default_block) {
-  // Figure out the correct compare values and jump conditions.
-  // Handle the first compare/branch as a special case because it might
-  // jump to the default case.
-  DCHECK_GT(num_entries, 2u);
-  Condition first_condition;
-  uint32_t index;
-  const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
-  if (lower_bound != 0) {
-    first_condition = kLess;
-    __ cmpl(value_reg, Immediate(lower_bound));
-    __ j(first_condition, codegen_->GetLabelOf(default_block));
-    __ j(kEqual, codegen_->GetLabelOf(successors[0]));
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  int32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-    index = 1;
-  } else {
-    // Handle all the compare/jumps below.
-    first_condition = kBelow;
-    index = 0;
-  }
-
-  // Handle the rest of the compare/jumps.
-  for (; index + 1 < num_entries; index += 2) {
-    int32_t compare_to_value = lower_bound + index + 1;
-    __ cmpl(value_reg, Immediate(compare_to_value));
-    // Jump to successors[index] if value < case_value[index].
-    __ j(first_condition, codegen_->GetLabelOf(successors[index]));
-    // Jump to successors[index + 1] if value == case_value[index + 1].
-    __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
-  }
-
-  if (index != num_entries) {
-    // There are an odd number of entries. Handle the last one.
-    DCHECK_EQ(index + 1, num_entries);
-    __ cmpl(value_reg, Immediate(lower_bound + index));
-    __ j(kEqual, codegen_->GetLabelOf(successors[index]));
+  // Create a series of compare/jumps.
+  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+  for (int i = 0; i < num_entries; i++) {
+    int32_t case_value = lower_bound + i;
+    if (case_value == 0) {
+      __ testl(value_reg, value_reg);
+    } else {
+      __ cmpl(value_reg, Immediate(case_value));
+    }
+    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
   }
 
   // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
-    __ jmp(codegen_->GetLabelOf(default_block));
+  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ jmp(codegen_->GetLabelOf(default_block));
   }
 }
 
-void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
-  int32_t lower_bound = switch_instr->GetStartValue();
-  uint32_t num_entries = switch_instr->GetNumEntries();
-  LocationSummary* locations = switch_instr->GetLocations();
-  Register value_reg = locations->InAt(0).AsRegister<Register>();
-
-  GenPackedSwitchWithCompares(value_reg,
-                              lower_bound,
-                              num_entries,
-                              switch_instr->GetBlock(),
-                              switch_instr->GetDefaultBlock());
-}
-
 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
@@ -6589,20 +6791,11 @@
 
 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  uint32_t num_entries = switch_instr->GetNumEntries();
+  int32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  if (num_entries <= kPackedSwitchJumpTableThreshold) {
-    GenPackedSwitchWithCompares(value_reg,
-                                lower_bound,
-                                num_entries,
-                                switch_instr->GetBlock(),
-                                default_block);
-    return;
-  }
-
   // Optimizing has a jump area.
   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
   Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -6614,7 +6807,7 @@
   }
 
   // Is the value in range?
-  DCHECK_GE(num_entries, 1u);
+  DCHECK_GE(num_entries, 1);
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
@@ -6839,7 +7032,7 @@
 // TODO: target as memory.
 void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) {
   if (!target.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f9403a6..7c292fa 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -195,11 +195,6 @@
 
   X86Assembler* GetAssembler() const { return assembler_; }
 
-  // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
-  // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
-  // generates less code/data with a small num_entries.
-  static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
-
  private:
   // Generate code for the given suspend check. If not null, `successor`
   // is the block to branch to if the suspend check is not needed, and after
@@ -219,11 +214,44 @@
   void GenerateShlLong(const Location& loc, int shift);
   void GenerateShrLong(const Location& loc, int shift);
   void GenerateUShrLong(const Location& loc, int shift);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a read barrier.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a Baker's read barrier.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               Register obj,
+                               uint32_t offset);
+
   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
   // `is_wide` specifies whether it is long/double or not.
   void PushOntoFPStack(Location source, uint32_t temp_offset,
@@ -241,11 +269,6 @@
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
-  void GenPackedSwitchWithCompares(Register value_reg,
-                                   int32_t lower_bound,
-                                   uint32_t num_entries,
-                                   HBasicBlock* switch_block,
-                                   HBasicBlock* default_block);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
@@ -364,6 +387,8 @@
                   Register value,
                   bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   Label* GetLabelOf(HBasicBlock* block) const {
     return CommonGetLabelOf<Label>(block_labels_, block);
   }
@@ -405,7 +430,26 @@
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -422,23 +466,25 @@
   // When `index` is provided (i.e. for array accesses), the offset
   // value passed to artReadBarrierSlow is adjusted to take `index`
   // into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -448,9 +494,18 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
   struct PcRelativeDexCacheAccessInfo {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 610ce67..92cef5f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -41,10 +41,6 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = RDI;
-// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
-// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
-// generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
 
 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
@@ -456,6 +452,56 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
 };
 
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
+ public:
+  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
+      : instruction_(instruction), out_(out), obj_(obj) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast())
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location obj_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
  public:
@@ -477,7 +523,7 @@
     // reference load to be instrumented, e.g.:
     //
     //   __ movl(out, Address(out, offset));
-    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
     //
     // In that case, we have lost the information about the original
     // object, and the emitted read barrier cannot work properly.
@@ -493,7 +539,9 @@
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
     DCHECK(!instruction_->IsInvoke() ||
            (instruction_->IsInvokeStaticOrDirect() &&
-            instruction_->GetLocations()->Intrinsified()));
+            instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -634,13 +682,17 @@
 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
  public:
   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
-      : instruction_(instruction), out_(out), root_(root) {}
+      : instruction_(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(locations->CanCall());
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
-    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -731,7 +783,7 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
       __ gs()->movl(temp.AsRegister<CpuRegister>(),
-                    Address::Absolute(invoke->GetStringInitOffset(), true));
+                    Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
@@ -748,7 +800,7 @@
       pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
                                                   invoke->GetDexCacheArrayOffset());
       __ movq(temp.AsRegister<CpuRegister>(),
-              Address::Absolute(kDummy32BitOffset, false /* no_rip */));
+              Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
       // Bind the label at the end of the "movl" insn.
       __ Bind(&pc_relative_dex_cache_patches_.back().label);
       break;
@@ -907,7 +959,7 @@
                                         uint32_t dex_pc,
                                         SlowPathCode* slow_path) {
   ValidateInvokeRuntime(instruction, slow_path);
-  __ gs()->call(Address::Absolute(entry_point_offset, true));
+  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
@@ -1939,7 +1991,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
-  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
 }
 
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -2667,7 +2719,8 @@
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
             __ movl(out.AsRegister<CpuRegister>(),
-                    Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+                    Immediate(static_cast<uint16_t>(
+                        in.GetConstant()->AsIntConstant()->GetValue())));
           }
           break;
 
@@ -2911,7 +2964,8 @@
         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ addss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ addss(first.AsFpuRegister<XmmRegister>(),
@@ -2925,7 +2979,8 @@
         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ addsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ addsd(first.AsFpuRegister<XmmRegister>(),
@@ -3000,7 +3055,8 @@
         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ subss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ subss(first.AsFpuRegister<XmmRegister>(),
@@ -3014,7 +3070,8 @@
         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ subsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ subsd(first.AsFpuRegister<XmmRegister>(),
@@ -3121,7 +3178,8 @@
         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ mulss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ mulss(first.AsFpuRegister<XmmRegister>(),
@@ -3136,7 +3194,8 @@
         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ mulsd(first.AsFpuRegister<XmmRegister>(),
@@ -3542,7 +3601,8 @@
         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ divss(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+                 codegen_->LiteralFloatAddress(
+                     second.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(second.IsStackSlot());
         __ divss(first.AsFpuRegister<XmmRegister>(),
@@ -3556,7 +3616,8 @@
         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
         __ divsd(first.AsFpuRegister<XmmRegister>(),
-                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+                 codegen_->LiteralDoubleAddress(
+                     second.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(second.IsDoubleStackSlot());
         __ divsd(first.AsFpuRegister<XmmRegister>(),
@@ -3755,6 +3816,7 @@
     }
     default:
       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+      UNREACHABLE();
   }
 }
 
@@ -3959,10 +4021,10 @@
   LOG(FATAL) << "Unimplemented";
 }
 
-void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
-   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
    */
   switch (kind) {
@@ -4002,6 +4064,11 @@
         Location::RequiresRegister(),
         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
@@ -4037,12 +4104,36 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
     }
 
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
       break;
@@ -4063,14 +4154,20 @@
       UNREACHABLE();
   }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
-
-  if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+  if (field_type == Primitive::kPrimNot) {
+    // Potential implicit null checks, in the case of reference
+    // fields, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
-  if (field_type == Primitive::kPrimNot) {
-    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+  if (is_volatile) {
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
   }
 }
 
@@ -4124,7 +4221,7 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
   bool maybe_record_implicit_null_check_done = false;
@@ -4230,7 +4327,7 @@
   }
 
   if (is_volatile) {
-    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 }
 
@@ -4407,6 +4504,11 @@
         Location::RequiresRegister(),
         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -4414,12 +4516,13 @@
   Location obj_loc = locations->InAt(0);
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
-  Primitive::Type type = instruction->GetType();
+  Location out_loc = locations->Out();
 
+  Primitive::Type type = instruction->GetType();
   switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4431,7 +4534,7 @@
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4443,7 +4546,7 @@
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4455,7 +4558,7 @@
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4465,13 +4568,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      static_assert(
-          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4481,9 +4580,46 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(
+            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+      } else {
+        CpuRegister out = out_loc.AsRegister<CpuRegister>();
+        if (index.IsConstant()) {
+          uint32_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ movl(out, Address(obj, offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+      CpuRegister out = out_loc.AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movq(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4495,7 +4631,7 @@
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4507,7 +4643,7 @@
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4521,20 +4657,12 @@
       LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    static_assert(
-        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
-    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-    Location out = locations->Out();
-    if (index.IsConstant()) {
-      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
-    } else {
-      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
-    }
+    // Potential implicit null checks, in the case of reference
+    // arrays, are handled in the previous switch statement.
+  } else {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
 
@@ -4658,12 +4786,12 @@
           //   __ movl(temp2, temp);
           //   // /* HeapReference<Class> */ temp = temp->component_type_
           //   __ movl(temp, Address(temp, component_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
           //
           //   // /* HeapReference<Class> */ temp2 = register_value->klass_
           //   __ movl(temp2, Address(register_value, class_offset));
-          //   codegen_->GenerateReadBarrier(
+          //   codegen_->GenerateReadBarrierSlow(
           //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
           //
           //   __ cmpl(temp, temp2);
@@ -4889,8 +5017,8 @@
     __ testl(value, value);
     __ j(kEqual, &is_null);
   }
-  __ gs()->movq(card, Address::Absolute(
-      Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true));
+  __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
+                                        /* no_rip */ true));
   __ movq(temp, object);
   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
   __ movb(Address(temp, card, TIMES_1, 0), card);
@@ -4949,8 +5077,9 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ gs()->cmpw(Address::Absolute(
-      Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
+  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
+                                  /* no_rip */ true),
+                Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());
@@ -5174,7 +5303,7 @@
           Immediate(mirror::Class::kStatusInitialized));
   __ j(kLess, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
-  // No need for memory fence, thanks to the X86_64 memory model.
+  // No need for memory fence, thanks to the x86-64 memory model.
 }
 
 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -5205,32 +5334,16 @@
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-      __ leaq(out, Address(current_method, declaring_class_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-      __ movl(out, Address(current_method, declaring_class_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    GenerateGcRootFieldLoad(
+        cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
     // /* GcRoot<mirror::Class>[] */ out =
     //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ movq(out, Address(current_method,
                          ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
-
-    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-    if (kEmitCompilerReadBarrier) {
-      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
-      __ leaq(out, Address(out, cache_offset));
-      // /* mirror::Class* */ out = out->Read()
-      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
-    } else {
-      // /* GcRoot<mirror::Class> */ out = out[type_index]
-      __ movl(out, Address(out, cache_offset));
-    }
+    // /* GcRoot<mirror::Class> */ out = out[type_index]
+    GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -5283,30 +5396,14 @@
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
   CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
 
-  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
-    __ leaq(out, Address(current_method, declaring_class_offset));
-    // /* mirror::Class* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
-    __ movl(out, Address(current_method, declaring_class_offset));
-  }
-
+  // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+  GenerateGcRootFieldLoad(
+      load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-
-  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
-  if (kEmitCompilerReadBarrier) {
-    // /* GcRoot<mirror::String>* */ out = &out[string_index]
-    __ leaq(out, Address(out, cache_offset));
-    // /* mirror::String* */ out = out->Read()
-    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
-  } else {
-    // /* GcRoot<mirror::String> */ out = out[string_index]
-    __ movl(out, Address(out, cache_offset));
-  }
+  // /* GcRoot<mirror::String> */ out = out[string_index]
+  GenerateGcRootFieldLoad(
+      load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
 
   if (!load->IsInDexCache()) {
     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
@@ -5318,7 +5415,8 @@
 }
 
 static Address GetExceptionTlsAddress() {
-  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true);
+  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
+                           /* no_rip */ true);
 }
 
 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
@@ -5354,6 +5452,14 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5379,21 +5485,22 @@
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
   Location out_loc =  locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
+  Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5409,10 +5516,9 @@
   }
 
   // /* HeapReference<Class> */ out = obj->klass_
-  __ movl(out, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
@@ -5438,17 +5544,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, success;
       __ Bind(&loop);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5477,17 +5574,8 @@
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->super_class_
-      __ movl(out, Address(out, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5511,17 +5599,8 @@
       }
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `out` into `temp` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-        __ movl(temp, out);
-      }
       // /* HeapReference<Class> */ out = out->component_type_
-      __ movl(out, Address(out, component_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5565,6 +5644,13 @@
       // HInstanceOf instruction (following the runtime calling
       // convention), which might be cluttered by the potential first
       // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
                                                                        /* is_fatal */ false);
@@ -5617,27 +5703,27 @@
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (kEmitCompilerReadBarrier &&
-      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
   Location obj_loc = locations->InAt(0);
   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+  Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
 
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   bool is_type_check_slow_path_fatal =
       (type_check_kind == TypeCheckKind::kExactCheck ||
        type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5649,7 +5735,7 @@
                                                            is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(type_check_slow_path);
 
-  NearLabel done;
+  Label done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
     __ testl(obj, obj);
@@ -5657,8 +5743,7 @@
   }
 
   // /* HeapReference<Class> */ temp = obj->klass_
-  __ movl(temp, Address(obj, class_offset));
-  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
 
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
@@ -5680,18 +5765,8 @@
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, compare_classes;
       __ Bind(&loop);
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // to the `compare_classes` label to compare it with the checked
@@ -5704,8 +5779,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&compare_classes);
@@ -5731,18 +5805,8 @@
       }
       __ j(kEqual, &done);
 
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->super_class_
-      __ movl(temp, Address(temp, super_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
@@ -5754,8 +5818,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5772,19 +5835,8 @@
       __ j(kEqual, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
-      Location temp2_loc =
-          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
-      if (kEmitCompilerReadBarrier) {
-        // Save the value of `temp` into `temp2` before overwriting it
-        // in the following move operation, as we will need it for the
-        // read barrier below.
-        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
-        __ movl(temp2, temp);
-      }
       // /* HeapReference<Class> */ temp = temp->component_type_
-      __ movl(temp, Address(temp, component_offset));
-      codegen_->MaybeGenerateReadBarrier(
-          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
@@ -5798,8 +5850,7 @@
       // going into the slow path, as it has been overwritten in the
       // meantime.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
 
       __ Bind(&check_non_primitive_component_type);
@@ -5807,8 +5858,7 @@
       __ j(kEqual, &done);
       // Same comment as above regarding `temp` and the slow path.
       // /* HeapReference<Class> */ temp = obj->klass_
-      __ movl(temp, Address(obj, class_offset));
-      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
@@ -5825,6 +5875,13 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
   }
@@ -5968,14 +6025,227 @@
   }
 }
 
-void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction,
-                                              Location out,
-                                              Location ref,
-                                              Location obj,
-                                              uint32_t offset,
-                                              Location index) {
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                                                      Location out,
+                                                                      uint32_t offset,
+                                                                      Location temp) {
+  CpuRegister out_reg = out.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ movl(temp.AsRegister<CpuRegister>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ movl(out_reg, Address(out_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ movl(out_reg, Address(out_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                                                       Location out,
+                                                                       Location obj,
+                                                                       uint32_t offset,
+                                                                       Location temp) {
+  CpuRegister out_reg = out.AsRegister<CpuRegister>();
+  CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ movl(out_reg, Address(obj_reg, offset));
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ movl(out_reg, Address(obj_reg, offset));
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                             Location root,
+                                                             CpuRegister obj,
+                                                             uint32_t offset) {
+  CpuRegister root_reg = root.AsRegister<CpuRegister>();
+  if (kEmitCompilerReadBarrier) {
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   if (Thread::Current()->GetIsGcMarking()) {
+      //     root = ReadBarrier::Mark(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ movl(root_reg, Address(obj, offset));
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path used to mark the GC root `root`.
+      SlowPathCode* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
+      codegen_->AddSlowPath(slow_path);
+
+      __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
+                                      /* no_rip */ true),
+                    Immediate(0));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ leaq(root_reg, Address(obj, offset));
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    __ movl(root_reg, Address(obj, offset));
+  }
+}
+
+void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                CpuRegister obj,
+                                                                uint32_t offset,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Address src(obj, offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                CpuRegister obj,
+                                                                uint32_t data_offset,
+                                                                Location index,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  Address src = index.IsConstant() ?
+      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+      Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
+  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                    Location ref,
+                                                                    CpuRegister obj,
+                                                                    const Address& src,
+                                                                    Location temp,
+                                                                    bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as:
+  // - it implements the load-load fence using a data dependency on
+  //   the high-bits of rb_state, which are expected to be all zeroes;
+  // - it performs additional checks that we do not do here for
+  //   performance reasons.
+
+  CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
+  CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ movl(temp_reg, Address(obj, monitor_offset));
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
+  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
+  static_assert(
+      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+  // Load fence to prevent load-load reordering.
+  // Note that this is a no-op, thanks to the x86-64 memory model.
+  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+  // The actual reference load.
+  // /* HeapReference<Object> */ ref = *src
+  __ movl(ref_reg, src);
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path used to mark the object `ref` when it is gray.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::gray_ptr_)
+  //   ref = ReadBarrier::Mark(ref);
+  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
+  __ j(kEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                  Location out,
+                                                  Location ref,
+                                                  Location obj,
+                                                  uint32_t offset,
+                                                  Location index) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the reference load.
+  //
   // If heap poisoning is enabled, the unpoisoning of the loaded
   // reference will be carried out by the runtime within the slow
   // path.
@@ -5989,57 +6259,41 @@
       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
   AddSlowPath(slow_path);
 
-  // TODO: When read barrier has a fast path, add it here.
-  /* Currently the read barrier call is inserted after the original load.
-   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
-   * original load. This load-load ordering is required by the read barrier.
-   * The fast path/slow path (for Baker's algorithm) should look like:
-   *
-   * bool isGray = obj.LockWord & kReadBarrierMask;
-   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
-   * ref = obj.field;    // this is the original load
-   * if (isGray) {
-   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
-   * }
-   */
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
-void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction,
-                                                   Location out,
-                                                   Location ref,
-                                                   Location obj,
-                                                   uint32_t offset,
-                                                   Location index) {
+void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                       Location out,
+                                                       Location ref,
+                                                       Location obj,
+                                                       uint32_t offset,
+                                                       Location index) {
   if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
     // If heap poisoning is enabled, unpoisoning will be taken care of
     // by the runtime within the slow path.
-    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
   } else if (kPoisonHeapReferences) {
     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
   }
 }
 
-void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction,
-                                                     Location out,
-                                                     Location root) {
+void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                         Location out,
+                                                         Location root) {
   DCHECK(kEmitCompilerReadBarrier);
 
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
   // Note that GC roots are not affected by heap poisoning, so we do
   // not need to do anything special for this here.
   SlowPathCode* slow_path =
       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
   AddSlowPath(slow_path);
 
-  // TODO: Implement a fast path for ReadBarrierForRoot, performing
-  // the following operation (for Baker's algorithm):
-  //
-  //   if (thread.tls32_.is_gc_marking) {
-  //     root = Mark(root);
-  //   }
-
   __ jmp(slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -6077,58 +6331,11 @@
 
 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  uint32_t num_entries = switch_instr->GetNumEntries();
+  int32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
-
-  // Should we generate smaller inline compare/jumps?
-  if (num_entries <= kPackedSwitchJumpTableThreshold) {
-    // Figure out the correct compare values and jump conditions.
-    // Handle the first compare/branch as a special case because it might
-    // jump to the default case.
-    DCHECK_GT(num_entries, 2u);
-    Condition first_condition;
-    uint32_t index;
-    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    if (lower_bound != 0) {
-      first_condition = kLess;
-      __ cmpl(value_reg_in, Immediate(lower_bound));
-      __ j(first_condition, codegen_->GetLabelOf(default_block));
-      __ j(kEqual, codegen_->GetLabelOf(successors[0]));
-
-      index = 1;
-    } else {
-      // Handle all the compare/jumps below.
-      first_condition = kBelow;
-      index = 0;
-    }
-
-    // Handle the rest of the compare/jumps.
-    for (; index + 1 < num_entries; index += 2) {
-      int32_t compare_to_value = lower_bound + index + 1;
-      __ cmpl(value_reg_in, Immediate(compare_to_value));
-      // Jump to successors[index] if value < case_value[index].
-      __ j(first_condition, codegen_->GetLabelOf(successors[index]));
-      // Jump to successors[index + 1] if value == case_value[index + 1].
-      __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
-    }
-
-    if (index != num_entries) {
-      // There are an odd number of entries. Handle the last one.
-      DCHECK_EQ(index + 1, num_entries);
-      __ cmpl(value_reg_in, Immediate(lower_bound + index));
-      __ j(kEqual, codegen_->GetLabelOf(successors[index]));
-    }
-
-    // And the default for any other value.
-    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-      __ jmp(codegen_->GetLabelOf(default_block));
-    }
-    return;
-  }
 
   // Remove the bias, if needed.
   Register value_reg_out = value_reg_in.AsRegister();
@@ -6139,6 +6346,7 @@
   CpuRegister value_reg(value_reg_out);
 
   // Is the value in range?
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
@@ -6288,7 +6496,7 @@
 // TODO: trg as memory.
 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
-    DCHECK(type == Primitive::kPrimVoid);
+    DCHECK_EQ(type, Primitive::kPrimVoid);
     return;
   }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 7351fed..dda9ea2 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -213,11 +213,44 @@
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleShift(HBinaryOperation* operation);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   void HandleFieldSet(HInstruction* instruction,
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a read barrier.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  // Register `temp` is used when generating a Baker's read barrier.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location temp);
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               CpuRegister obj,
+                               uint32_t offset);
+
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void PushOntoFPStack(Location source, uint32_t temp_offset,
@@ -324,6 +357,8 @@
                   CpuRegister value,
                   bool value_can_be_null);
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
   // Helper method to move a value between two locations.
   void Move(Location destination, Location source);
 
@@ -356,7 +391,26 @@
     return isa_features_;
   }
 
-  // Generate a read barrier for a heap reference within `instruction`.
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             CpuRegister obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             CpuRegister obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
   //
   // A read barrier for an object reference read from the heap is
   // implemented as a call to the artReadBarrierSlow runtime entry
@@ -373,23 +427,25 @@
   // When `index` provided (i.e., when it is different from
   // Location::NoLocation()), the offset value passed to
   // artReadBarrierSlow is adjusted to take `index` into account.
-  void GenerateReadBarrier(HInstruction* instruction,
-                           Location out,
-                           Location ref,
-                           Location obj,
-                           uint32_t offset,
-                           Location index = Location::NoLocation());
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
 
-  // If read barriers are enabled, generate a read barrier for a heap reference.
-  // If heap poisoning is enabled, also unpoison the reference in `out`.
-  void MaybeGenerateReadBarrier(HInstruction* instruction,
-                                Location out,
-                                Location ref,
-                                Location obj,
-                                uint32_t offset,
-                                Location index = Location::NoLocation());
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
 
-  // Generate a read barrier for a GC root within `instruction`.
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
   //
   // A read barrier for an object reference GC root is implemented as
   // a call to the artReadBarrierForRootSlow runtime entry point,
@@ -399,7 +455,7 @@
   //
   // The `out` location contains the value returned by
   // artReadBarrierForRootSlow.
-  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
   int ConstantAreaStart() const {
     return constant_area_start_;
@@ -424,6 +480,15 @@
                           HInstruction* instruction);
 
  private:
+  // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 CpuRegister obj,
+                                                 const Address& src,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
   struct PcRelativeDexCacheAccessInfo {
     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
         : target_dex_file(dex_file), element_offset(element_off), label() { }
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 57de41f..d970704 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -35,6 +35,7 @@
 #include "code_generator_mips64.h"
 #include "code_generator_x86.h"
 #include "code_generator_x86_64.h"
+#include "code_simulator_container.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
@@ -124,26 +125,85 @@
   DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
 };
 
+static bool CanExecuteOnHardware(InstructionSet target_isa) {
+  return (target_isa == kRuntimeISA)
+      // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
+      || (kRuntimeISA == kArm && target_isa == kThumb2);
+}
+
+static bool CanExecute(InstructionSet target_isa) {
+  CodeSimulatorContainer simulator(target_isa);
+  return CanExecuteOnHardware(target_isa) || simulator.CanSimulate();
+}
+
+template <typename Expected>
+static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)());
+
+template <>
+bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnBool();
+}
+
+template <>
+int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnInt32();
+}
+
+template <>
+int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) {
+  simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+  return simulator->GetCReturnInt64();
+}
+
+template <typename Expected>
+static void VerifyGeneratedCode(InstructionSet target_isa,
+                                Expected (*f)(),
+                                bool has_result,
+                                Expected expected) {
+  ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable.";
+
+  // Verify on simulator.
+  CodeSimulatorContainer simulator(target_isa);
+  if (simulator.CanSimulate()) {
+    Expected result = SimulatorExecute<Expected>(simulator.Get(), f);
+    if (has_result) {
+      ASSERT_EQ(expected, result);
+    }
+  }
+
+  // Verify on hardware.
+  if (CanExecuteOnHardware(target_isa)) {
+    Expected result = f();
+    if (has_result) {
+      ASSERT_EQ(expected, result);
+    }
+  }
+}
+
 template <typename Expected>
 static void Run(const InternalCodeAllocator& allocator,
                 const CodeGenerator& codegen,
                 bool has_result,
                 Expected expected) {
+  InstructionSet target_isa = codegen.GetInstructionSet();
+
   typedef Expected (*fptr)();
   CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
   fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
-  if (codegen.GetInstructionSet() == kThumb2) {
+  if (target_isa == kThumb2) {
     // For thumb we need the bottom bit set.
     f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
   }
-  Expected result = f();
-  if (has_result) {
-    ASSERT_EQ(expected, result);
-  }
+  VerifyGeneratedCode(target_isa, f, has_result, expected);
 }
 
 template <typename Expected>
-static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
+static void RunCodeBaseline(InstructionSet target_isa,
+                            HGraph* graph,
+                            bool has_result,
+                            Expected expected) {
   InternalCodeAllocator allocator;
 
   CompilerOptions compiler_options;
@@ -153,7 +213,7 @@
   // We avoid doing a stack overflow check that requires the runtime being setup,
   // by making sure the compiler knows the methods we are running are leaf methods.
   codegenX86.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kX86) {
+  if (target_isa == kX86) {
     Run(allocator, codegenX86, has_result, expected);
   }
 
@@ -161,7 +221,7 @@
       ArmInstructionSetFeatures::FromCppDefines());
   TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
   codegenARM.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
+  if (target_isa == kArm || target_isa == kThumb2) {
     Run(allocator, codegenARM, has_result, expected);
   }
 
@@ -169,7 +229,7 @@
       X86_64InstructionSetFeatures::FromCppDefines());
   x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
   codegenX86_64.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kX86_64) {
+  if (target_isa == kX86_64) {
     Run(allocator, codegenX86_64, has_result, expected);
   }
 
@@ -177,7 +237,7 @@
       Arm64InstructionSetFeatures::FromCppDefines());
   arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
   codegenARM64.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kArm64) {
+  if (target_isa == kArm64) {
     Run(allocator, codegenARM64, has_result, expected);
   }
 
@@ -193,7 +253,7 @@
       Mips64InstructionSetFeatures::FromCppDefines());
   mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
   codegenMIPS64.CompileBaseline(&allocator, true);
-  if (kRuntimeISA == kMips64) {
+  if (target_isa == kMips64) {
     Run(allocator, codegenMIPS64, has_result, expected);
   }
 }
@@ -221,37 +281,38 @@
 }
 
 template <typename Expected>
-static void RunCodeOptimized(HGraph* graph,
+static void RunCodeOptimized(InstructionSet target_isa,
+                             HGraph* graph,
                              std::function<void(HGraph*)> hook_before_codegen,
                              bool has_result,
                              Expected expected) {
   CompilerOptions compiler_options;
-  if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
-    TestCodeGeneratorARM codegenARM(graph,
-                                    *ArmInstructionSetFeatures::FromCppDefines(),
-                                    compiler_options);
+  if (target_isa == kArm || target_isa == kThumb2) {
+    std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+        ArmInstructionSetFeatures::FromCppDefines());
+    TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
     RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kArm64) {
-    arm64::CodeGeneratorARM64 codegenARM64(graph,
-                                           *Arm64InstructionSetFeatures::FromCppDefines(),
-                                           compiler_options);
+  } else if (target_isa == kArm64) {
+    std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
+        Arm64InstructionSetFeatures::FromCppDefines());
+    arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
     RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kX86) {
+  } else if (target_isa == kX86) {
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
     RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kX86_64) {
+  } else if (target_isa == kX86_64) {
     std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
     RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kMips) {
+  } else if (target_isa == kMips) {
     std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
         MipsInstructionSetFeatures::FromCppDefines());
     mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
     RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kMips64) {
+  } else if (target_isa == kMips64) {
     std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
         Mips64InstructionSetFeatures::FromCppDefines());
     mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
@@ -259,7 +320,10 @@
   }
 }
 
-static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) {
+static void TestCode(InstructionSet target_isa,
+                     const uint16_t* data,
+                     bool has_result = false,
+                     int32_t expected = 0) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
   HGraph* graph = CreateGraph(&arena);
@@ -269,10 +333,13 @@
   ASSERT_TRUE(graph_built);
   // Remove suspend checks, they cannot be executed in this context.
   RemoveSuspendChecks(graph);
-  RunCodeBaseline(graph, has_result, expected);
+  RunCodeBaseline(target_isa, graph, has_result, expected);
 }
 
-static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) {
+static void TestCodeLong(InstructionSet target_isa,
+                         const uint16_t* data,
+                         bool has_result,
+                         int64_t expected) {
   ArenaPool pool;
   ArenaAllocator arena(&pool);
   HGraph* graph = CreateGraph(&arena);
@@ -282,108 +349,110 @@
   ASSERT_TRUE(graph_built);
   // Remove suspend checks, they cannot be executed in this context.
   RemoveSuspendChecks(graph);
-  RunCodeBaseline(graph, has_result, expected);
+  RunCodeBaseline(target_isa, graph, has_result, expected);
 }
 
-TEST(CodegenTest, ReturnVoid) {
+class CodegenTest: public ::testing::TestWithParam<InstructionSet> {};
+
+TEST_P(CodegenTest, ReturnVoid) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, CFG1) {
+TEST_P(CodegenTest, CFG1) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, CFG2) {
+TEST_P(CodegenTest, CFG2) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, CFG3) {
+TEST_P(CodegenTest, CFG3) {
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0xFF00);
 
-  TestCode(data1);
+  TestCode(GetParam(), data1);
 
   const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_16, 3,
     Instruction::RETURN_VOID,
     Instruction::GOTO_16, 0xFFFF);
 
-  TestCode(data2);
+  TestCode(GetParam(), data2);
 
   const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO_32, 4, 0,
     Instruction::RETURN_VOID,
     Instruction::GOTO_32, 0xFFFF, 0xFFFF);
 
-  TestCode(data3);
+  TestCode(GetParam(), data3);
 }
 
-TEST(CodegenTest, CFG4) {
+TEST_P(CodegenTest, CFG4) {
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
     Instruction::GOTO | 0x100,
     Instruction::GOTO | 0xFE00);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, CFG5) {
+TEST_P(CodegenTest, CFG5) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::IF_EQ, 3,
     Instruction::GOTO | 0x100,
     Instruction::RETURN_VOID);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, IntConstant) {
+TEST_P(CodegenTest, IntConstant) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN_VOID);
 
-  TestCode(data);
+  TestCode(GetParam(), data);
 }
 
-TEST(CodegenTest, Return1) {
+TEST_P(CodegenTest, Return1) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN | 0);
 
-  TestCode(data, true, 0);
+  TestCode(GetParam(), data, true, 0);
 }
 
-TEST(CodegenTest, Return2) {
+TEST_P(CodegenTest, Return2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 0 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(data, true, 0);
+  TestCode(GetParam(), data, true, 0);
 }
 
-TEST(CodegenTest, Return3) {
+TEST_P(CodegenTest, Return3) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(data, true, 1);
+  TestCode(GetParam(), data, true, 1);
 }
 
-TEST(CodegenTest, ReturnIf1) {
+TEST_P(CodegenTest, ReturnIf1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -391,10 +460,10 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(data, true, 1);
+  TestCode(GetParam(), data, true, 1);
 }
 
-TEST(CodegenTest, ReturnIf2) {
+TEST_P(CodegenTest, ReturnIf2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
     Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -402,12 +471,12 @@
     Instruction::RETURN | 0 << 8,
     Instruction::RETURN | 1 << 8);
 
-  TestCode(data, true, 0);
+  TestCode(GetParam(), data, true, 0);
 }
 
 // Exercise bit-wise (one's complement) not-int instruction.
 #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST(CodegenTest, TEST_NAME) {                          \
+TEST_P(CodegenTest, TEST_NAME) {                        \
   const int32_t input = INPUT;                          \
   const uint16_t input_lo = Low16Bits(input);           \
   const uint16_t input_hi = High16Bits(input);          \
@@ -416,7 +485,7 @@
       Instruction::NOT_INT | 1 << 8 | 0 << 12 ,         \
       Instruction::RETURN | 1 << 8);                    \
                                                         \
-  TestCode(data, true, EXPECTED_OUTPUT);                \
+  TestCode(GetParam(), data, true, EXPECTED_OUTPUT);    \
 }
 
 NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
@@ -432,7 +501,7 @@
 
 // Exercise bit-wise (one's complement) not-long instruction.
 #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT)                 \
-TEST(CodegenTest, TEST_NAME) {                                           \
+TEST_P(CodegenTest, TEST_NAME) {                                         \
   const int64_t input = INPUT;                                           \
   const uint16_t word0 = Low16Bits(Low32Bits(input));   /* LSW. */       \
   const uint16_t word1 = High16Bits(Low32Bits(input));                   \
@@ -443,7 +512,7 @@
       Instruction::NOT_LONG | 2 << 8 | 0 << 12,                          \
       Instruction::RETURN_WIDE | 2 << 8);                                \
                                                                          \
-  TestCodeLong(data, true, EXPECTED_OUTPUT);                             \
+  TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT);                 \
 }
 
 NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1))
@@ -482,7 +551,7 @@
 
 #undef NOT_LONG_TEST
 
-TEST(CodegenTest, IntToLongOfLongToInt) {
+TEST_P(CodegenTest, IntToLongOfLongToInt) {
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -496,48 +565,48 @@
       Instruction::INT_TO_LONG | 2 << 8 | 4 << 12,
       Instruction::RETURN_WIDE | 2 << 8);
 
-  TestCodeLong(data, true, 1);
+  TestCodeLong(GetParam(), data, true, 1);
 }
 
-TEST(CodegenTest, ReturnAdd1) {
+TEST_P(CodegenTest, ReturnAdd1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 7);
+  TestCode(GetParam(), data, true, 7);
 }
 
-TEST(CodegenTest, ReturnAdd2) {
+TEST_P(CodegenTest, ReturnAdd2) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::ADD_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(data, true, 7);
+  TestCode(GetParam(), data, true, 7);
 }
 
-TEST(CodegenTest, ReturnAdd3) {
+TEST_P(CodegenTest, ReturnAdd3) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 7);
+  TestCode(GetParam(), data, true, 7);
 }
 
-TEST(CodegenTest, ReturnAdd4) {
+TEST_P(CodegenTest, ReturnAdd4) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::ADD_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(data, true, 7);
+  TestCode(GetParam(), data, true, 7);
 }
 
-TEST(CodegenTest, NonMaterializedCondition) {
+TEST_P(CodegenTest, NonMaterializedCondition) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
 
@@ -583,30 +652,30 @@
     block->InsertInstructionBefore(move, block->GetLastInstruction());
   };
 
-  RunCodeOptimized(graph, hook_before_codegen, true, 0);
+  RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0);
 }
 
-TEST(CodegenTest, ReturnMulInt) {
+TEST_P(CodegenTest, ReturnMulInt) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT, 1 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 12);
+  TestCode(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulInt2addr) {
+TEST_P(CodegenTest, ReturnMulInt2addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 4 << 12 | 1 << 8,
     Instruction::MUL_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(data, true, 12);
+  TestCode(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulLong) {
+TEST_P(CodegenTest, ReturnMulLong) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
     Instruction::CONST_4 | 0 << 12 | 1 << 8,
@@ -615,10 +684,10 @@
     Instruction::MUL_LONG, 2 << 8 | 0,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(data, true, 12);
+  TestCodeLong(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulLong2addr) {
+TEST_P(CodegenTest, ReturnMulLong2addr) {
   const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0 << 8,
     Instruction::CONST_4 | 0 << 12 | 1 << 8,
@@ -627,28 +696,28 @@
     Instruction::MUL_LONG_2ADDR | 2 << 12,
     Instruction::RETURN_WIDE);
 
-  TestCodeLong(data, true, 12);
+  TestCodeLong(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulIntLit8) {
+TEST_P(CodegenTest, ReturnMulIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 12);
+  TestCode(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, ReturnMulIntLit16) {
+TEST_P(CodegenTest, ReturnMulIntLit16) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT16, 3,
     Instruction::RETURN);
 
-  TestCode(data, true, 12);
+  TestCode(GetParam(), data, true, 12);
 }
 
-TEST(CodegenTest, MaterializedCondition1) {
+TEST_P(CodegenTest, MaterializedCondition1) {
   // Check that condition are materialized correctly. A materialized condition
   // should yield `1` if it evaluated to true, and `0` otherwise.
   // We force the materialization of comparisons for different combinations of
@@ -689,11 +758,11 @@
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
 
-    RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
   }
 }
 
-TEST(CodegenTest, MaterializedCondition2) {
+TEST_P(CodegenTest, MaterializedCondition2) {
   // Check that HIf correctly interprets a materialized condition.
   // We force the materialization of comparisons for different combinations of
   // inputs. An HIf takes the materialized combination as input and returns a
@@ -755,31 +824,35 @@
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
 
-    RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+    RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
   }
 }
 
-TEST(CodegenTest, ReturnDivIntLit8) {
+TEST_P(CodegenTest, ReturnDivIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::DIV_INT_LIT8, 3 << 8 | 0,
     Instruction::RETURN);
 
-  TestCode(data, true, 1);
+  TestCode(GetParam(), data, true, 1);
 }
 
-TEST(CodegenTest, ReturnDivInt2Addr) {
+TEST_P(CodegenTest, ReturnDivInt2Addr) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::CONST_4 | 2 << 12 | 1 << 8,
     Instruction::DIV_INT_2ADDR | 1 << 12,
     Instruction::RETURN);
 
-  TestCode(data, true, 2);
+  TestCode(GetParam(), data, true, 2);
 }
 
 // Helper method.
-static void TestComparison(IfCondition condition, int64_t i, int64_t j, Primitive::Type type) {
+static void TestComparison(IfCondition condition,
+                           int64_t i,
+                           int64_t j,
+                           Primitive::Type type,
+                           const InstructionSet target_isa) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = CreateGraph(&allocator);
@@ -862,46 +935,78 @@
 
   auto hook_before_codegen = [](HGraph*) {
   };
-  RunCodeOptimized(graph, hook_before_codegen, true, expected_result);
+  RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result);
 }
 
-TEST(CodegenTest, ComparisonsInt) {
+TEST_P(CodegenTest, ComparisonsInt) {
+  const InstructionSet target_isa = GetParam();
   for (int64_t i = -1; i <= 1; i++) {
     for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimInt);
-      TestComparison(kCondNE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondLT, i, j, Primitive::kPrimInt);
-      TestComparison(kCondLE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondGT, i, j, Primitive::kPrimInt);
-      TestComparison(kCondGE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondB,  i, j, Primitive::kPrimInt);
-      TestComparison(kCondBE, i, j, Primitive::kPrimInt);
-      TestComparison(kCondA,  i, j, Primitive::kPrimInt);
-      TestComparison(kCondAE, i, j, Primitive::kPrimInt);
+      TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondB,  i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondA,  i, j, Primitive::kPrimInt, target_isa);
+      TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
     }
   }
 }
 
-TEST(CodegenTest, ComparisonsLong) {
+TEST_P(CodegenTest, ComparisonsLong) {
   // TODO: make MIPS work for long
   if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
     return;
   }
 
+  const InstructionSet target_isa = GetParam();
+  if (target_isa == kMips || target_isa == kMips64) {
+    return;
+  }
+
   for (int64_t i = -1; i <= 1; i++) {
     for (int64_t j = -1; j <= 1; j++) {
-      TestComparison(kCondEQ, i, j, Primitive::kPrimLong);
-      TestComparison(kCondNE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondLT, i, j, Primitive::kPrimLong);
-      TestComparison(kCondLE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondGT, i, j, Primitive::kPrimLong);
-      TestComparison(kCondGE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondB,  i, j, Primitive::kPrimLong);
-      TestComparison(kCondBE, i, j, Primitive::kPrimLong);
-      TestComparison(kCondA,  i, j, Primitive::kPrimLong);
-      TestComparison(kCondAE, i, j, Primitive::kPrimLong);
+      TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondB,  i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondA,  i, j, Primitive::kPrimLong, target_isa);
+      TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
     }
   }
 }
 
+static ::std::vector<InstructionSet> GetTargetISAs() {
+  ::std::vector<InstructionSet> v;
+  // Add all ISAs that are executable on hardware or on simulator.
+  const ::std::vector<InstructionSet> executable_isa_candidates = {
+    kArm,
+    kArm64,
+    kThumb2,
+    kX86,
+    kX86_64,
+    kMips,
+    kMips64
+  };
+
+  for (auto target_isa : executable_isa_candidates) {
+    if (CanExecute(target_isa)) {
+      v.push_back(target_isa);
+    }
+  }
+
+  return v;
+}
+
+INSTANTIATE_TEST_CASE_P(MultipleTargets,
+                        CodegenTest,
+                        ::testing::ValuesIn(GetTargetISAs()));
+
 }  // namespace art
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 0b7fdf8..19e6cbd 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -71,10 +71,10 @@
 }
 
 void HInductionVarAnalysis::Run() {
-  // Detects sequence variables (generalized induction variables) during an inner-loop-first
-  // traversal of all loops using Gerlek's algorithm. The order is only relevant if outer
-  // loops would use induction information of inner loops (not currently done).
-  for (HPostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
+  // Detects sequence variables (generalized induction variables) during an outer to inner
+  // traversal of all loops using Gerlek's algorithm. The order is important to enable
+  // range analysis on outer loop while visiting inner loops.
+  for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
     HBasicBlock* graph_block = it_graph.Current();
     if (graph_block->IsLoopHeader()) {
       VisitLoop(graph_block->GetLoopInformation());
@@ -745,8 +745,7 @@
       if (value == 1) {
         return b;
       } else if (value == -1) {
-        op = kNeg;
-        a = nullptr;
+        return CreateSimplifiedInvariant(kNeg, nullptr, b);
       }
     }
   }
@@ -763,26 +762,52 @@
       if (value == 1) {
         return a;
       } else if (value == -1) {
-        op = kNeg;
-        b = a;
-        a = nullptr;
+        return CreateSimplifiedInvariant(kNeg, nullptr, a);
       }
     }
   } else if (b->operation == kNeg) {
     // Simplify a + (-b) = a - b, a - (-b) = a + b, -(-b) = b.
     if (op == kAdd) {
-      op = kSub;
-      b = b->op_b;
+      return CreateSimplifiedInvariant(kSub, a, b->op_b);
     } else if (op == kSub) {
-      op = kAdd;
-      b = b->op_b;
+      return CreateSimplifiedInvariant(kAdd, a, b->op_b);
     } else if (op == kNeg) {
       return b->op_b;
     }
+  } else if (b->operation == kSub) {
+    // Simplify - (a - b) = b - a.
+    if (op == kNeg) {
+      return CreateSimplifiedInvariant(kSub, b->op_b, b->op_a);
+    }
   }
   return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr);
 }
 
+bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) {
+  if (info != nullptr && info->induction_class == kInvariant) {
+    // A direct constant fetch.
+    if (info->operation == kFetch) {
+      DCHECK(info->fetch);
+      if (info->fetch->IsIntConstant()) {
+        *value = info->fetch->AsIntConstant()->GetValue();
+        return true;
+      } else if (info->fetch->IsLongConstant()) {
+        *value = info->fetch->AsLongConstant()->GetValue();
+        return true;
+      }
+    }
+    // Use range analysis to resolve compound values.
+    InductionVarRange range(this);
+    int32_t min_val = 0;
+    int32_t max_val = 0;
+    if (range.IsConstantRange(info, &min_val, &max_val) && min_val == max_val) {
+      *value = min_val;
+      return true;
+    }
+  }
+  return false;
+}
+
 bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1,
                                            InductionInfo* info2) {
   // Test structural equality only, without accounting for simplifications.
@@ -798,33 +823,9 @@
   return info1 == info2;
 }
 
-bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) {
-  if (info != nullptr && info->induction_class == kInvariant) {
-    // A direct constant fetch.
-    if (info->operation == kFetch) {
-      DCHECK(info->fetch);
-      if (info->fetch->IsIntConstant()) {
-        *value = info->fetch->AsIntConstant()->GetValue();
-        return true;
-      } else if (info->fetch->IsLongConstant()) {
-        *value = info->fetch->AsLongConstant()->GetValue();
-        return true;
-      }
-    }
-    // Use range analysis to resolve compound values.
-    int32_t range_value;
-    if (InductionVarRange::GetConstant(info, &range_value)) {
-      *value = range_value;
-      return true;
-    }
-  }
-  return false;
-}
-
 std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) {
   if (info != nullptr) {
     if (info->induction_class == kInvariant) {
-      int64_t value = -1;
       std::string inv = "(";
       inv += InductionToString(info->op_a);
       switch (info->operation) {
@@ -840,8 +841,10 @@
         case kGE:    inv += " >= "; break;
         case kFetch:
           DCHECK(info->fetch);
-          if (IsIntAndGet(info, &value)) {
-            inv += std::to_string(value);
+          if (info->fetch->IsIntConstant()) {
+            inv += std::to_string(info->fetch->AsIntConstant()->GetValue());
+          } else if (info->fetch->IsLongConstant()) {
+            inv += std::to_string(info->fetch->AsLongConstant()->GetValue());
           } else {
             inv += std::to_string(info->fetch->GetId()) + ":" + info->fetch->DebugName();
           }
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index cf35409..84d5d82 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -188,9 +188,11 @@
   InductionInfo* CreateConstant(int64_t value, Primitive::Type type);
   InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b);
 
+  // Constants.
+  bool IsIntAndGet(InductionInfo* info, int64_t* value);
+
   // Helpers.
   static bool InductionEqual(InductionInfo* info1, InductionInfo* info2);
-  static bool IsIntAndGet(InductionInfo* info, int64_t* value);
   static std::string InductionToString(InductionInfo* info);
 
   // TODO: fine tune the following data structures, only keep relevant data.
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 9d0cde7..ae15fcf 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -119,7 +119,7 @@
   }
 }
 
-bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) {
+bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const {
   Value v1 = RefineOuter(*min_val, /* is_min */ true);
   Value v2 = RefineOuter(*max_val, /* is_min */ false);
   if (v1.instruction != min_val->instruction || v2.instruction != max_val->instruction) {
@@ -167,7 +167,7 @@
 // Private class methods.
 //
 
-bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
+bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const {
   if (info != nullptr) {
     if (info->induction_class == HInductionVarAnalysis::kLinear) {
       return true;
@@ -178,7 +178,7 @@
   return false;
 }
 
-bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const {
   if (trip != nullptr) {
     if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
       return trip->operation == HInductionVarAnalysis::kTripCountInBody ||
@@ -188,7 +188,7 @@
   return false;
 }
 
-bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const {
   if (trip != nullptr) {
     if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
       return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe ||
@@ -198,10 +198,57 @@
   return false;
 }
 
+InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::InductionInfo* info,
+                                                      HInductionVarAnalysis::InductionInfo* trip,
+                                                      bool in_body,
+                                                      bool is_min) const {
+  // Detect common situation where an offset inside the trip count cancels out during range
+  // analysis (finding max a * (TC - 1) + OFFSET for a == 1 and TC = UPPER - OFFSET or finding
+  // min a * (TC - 1) + OFFSET for a == -1 and TC = OFFSET - UPPER) to avoid losing information
+  // with intermediate results that only incorporate single instructions.
+  if (trip != nullptr) {
+    HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a;
+    if (trip_expr->operation == HInductionVarAnalysis::kSub) {
+      int32_t min_value = 0;
+      int32_t stride_value = 0;
+      if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) {
+        if (!is_min && stride_value == 1) {
+          // Test original trip's negative operand (trip_expr->op_b) against
+          // the offset of the linear induction.
+          if (HInductionVarAnalysis::InductionEqual(trip_expr->op_b, info->op_b)) {
+            // Analyze cancelled trip with just the positive operand (trip_expr->op_a).
+            HInductionVarAnalysis::InductionInfo cancelled_trip(
+                trip->induction_class, trip->operation, trip_expr->op_a, trip->op_b, nullptr);
+            return GetVal(&cancelled_trip, trip, in_body, is_min);
+          }
+        } else if (is_min && stride_value == -1) {
+          // Test original trip's positive operand (trip_expr->op_a) against
+          // the offset of the linear induction.
+          if (HInductionVarAnalysis::InductionEqual(trip_expr->op_a, info->op_b)) {
+            // Analyze cancelled trip with just the negative operand (trip_expr->op_b).
+            HInductionVarAnalysis::InductionInfo neg(
+                HInductionVarAnalysis::kInvariant,
+                HInductionVarAnalysis::kNeg,
+                nullptr,
+                trip_expr->op_b,
+                nullptr);
+            HInductionVarAnalysis::InductionInfo cancelled_trip(
+                trip->induction_class, trip->operation, &neg, trip->op_b, nullptr);
+            return SubValue(Value(0), GetVal(&cancelled_trip, trip, in_body, !is_min));
+          }
+        }
+      }
+    }
+  }
+  // General rule of linear induction a * i + b, for normalized 0 <= i < TC.
+  return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min),
+                  GetVal(info->op_b, trip, in_body, is_min));
+}
+
 InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
                                                      HInductionVarAnalysis::InductionInfo* trip,
                                                      bool in_body,
-                                                     bool is_min) {
+                                                     bool is_min) const {
   // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes
   // more likely range analysis will compare the same instructions as terminal nodes.
   int32_t value;
@@ -227,7 +274,7 @@
 InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::InductionInfo* info,
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
-                                                   bool is_min) {
+                                                   bool is_min) const {
   if (info != nullptr) {
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
@@ -266,13 +313,11 @@
             break;
         }
         break;
-      case HInductionVarAnalysis::kLinear:
-        // Linear induction a * i + b, for normalized 0 <= i < TC.
-        return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min),
-                        GetVal(info->op_b, trip, in_body, is_min));
+      case HInductionVarAnalysis::kLinear: {
+        return GetLinear(info, trip, in_body, is_min);
+      }
       case HInductionVarAnalysis::kWrapAround:
       case HInductionVarAnalysis::kPeriodic:
-        // Merge values in the wrap-around/periodic.
         return MergeVal(GetVal(info->op_a, trip, in_body, is_min),
                         GetVal(info->op_b, trip, in_body, is_min), is_min);
     }
@@ -284,11 +329,17 @@
                                                    HInductionVarAnalysis::InductionInfo* info2,
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
-                                                   bool is_min) {
+                                                   bool is_min) const {
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
+  // Try to refine certain failure.
+  if (v1_min.a_constant && v1_max.a_constant) {
+    v1_min = RefineOuter(v1_min, /* is_min */ true);
+    v1_max = RefineOuter(v1_max, /* is_min */ false);
+  }
+  // Positive or negative range?
   if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
     // Positive range vs. positive or negative range.
     if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
@@ -298,7 +349,7 @@
       return is_min ? MulValue(v1_max, v2_min)
                     : MulValue(v1_min, v2_max);
     }
-  } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
+  } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) {
     // Negative range vs. positive or negative range.
     if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
       return is_min ? MulValue(v1_min, v2_max)
@@ -315,11 +366,12 @@
                                                    HInductionVarAnalysis::InductionInfo* info2,
                                                    HInductionVarAnalysis::InductionInfo* trip,
                                                    bool in_body,
-                                                   bool is_min) {
+                                                   bool is_min) const {
   Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
   Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
   Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
   Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
+  // Positive or negative range?
   if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) {
     // Positive range vs. positive or negative range.
     if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
@@ -329,7 +381,7 @@
       return is_min ? DivValue(v1_max, v2_max)
                     : DivValue(v1_min, v2_min);
     }
-  } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) {
+  } else if (v1_max.is_known && v1_max.a_constant == 0 && v1_max.b_constant <= 0) {
     // Negative range vs. positive or negative range.
     if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) {
       return is_min ? DivValue(v1_min, v2_min)
@@ -342,19 +394,23 @@
   return Value();
 }
 
-bool InductionVarRange::GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value) {
-  Value v_min = GetVal(info, nullptr, false, /* is_min */ true);
-  Value v_max = GetVal(info, nullptr, false, /* is_min */ false);
-  if (v_min.is_known && v_max.is_known) {
-    if (v_min.a_constant == 0 && v_max.a_constant == 0 && v_min.b_constant == v_max.b_constant) {
-      *value = v_min.b_constant;
+bool InductionVarRange::IsConstantRange(HInductionVarAnalysis::InductionInfo* info,
+                                        int32_t *min_value,
+                                        int32_t *max_value) const {
+  bool in_body = true;  // no known trip count
+  Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true);
+  Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false);
+  do {
+    if (v_min.is_known && v_min.a_constant == 0 && v_max.is_known && v_max.a_constant == 0) {
+      *min_value = v_min.b_constant;
+      *max_value = v_max.b_constant;
       return true;
     }
-  }
+  } while (RefineOuter(&v_min, &v_max));
   return false;
 }
 
-InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && IsSafeAdd(v1.b_constant, v2.b_constant)) {
     const int32_t b = v1.b_constant + v2.b_constant;
     if (v1.a_constant == 0) {
@@ -368,7 +424,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && IsSafeSub(v1.b_constant, v2.b_constant)) {
     const int32_t b = v1.b_constant - v2.b_constant;
     if (v1.a_constant == 0 && IsSafeSub(0, v2.a_constant)) {
@@ -382,7 +438,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known) {
     if (v1.a_constant == 0) {
       if (IsSafeMul(v1.b_constant, v2.a_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) {
@@ -397,7 +453,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) {
+InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) const {
   if (v1.is_known && v2.is_known && v1.a_constant == 0 && v2.a_constant == 0) {
     if (IsSafeDiv(v1.b_constant, v2.b_constant)) {
       return Value(v1.b_constant / v2.b_constant);
@@ -406,7 +462,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) {
+InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) const {
   if (v1.is_known && v2.is_known) {
     if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) {
       return Value(v1.instruction, v1.a_constant,
@@ -417,7 +473,7 @@
   return Value();
 }
 
-InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) {
+InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) const {
   if (v.instruction != nullptr) {
     HLoopInformation* loop =
         v.instruction->GetBlock()->GetLoopInformation();  // closest enveloping loop
@@ -444,7 +500,7 @@
                                      /*out*/HInstruction** upper,
                                      /*out*/HInstruction** taken_test,
                                      /*out*/bool* needs_finite_test,
-                                     /*out*/bool* needs_taken_test) {
+                                     /*out*/bool* needs_taken_test) const {
   HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
   if (loop != nullptr) {
     // Set up loop information.
@@ -492,7 +548,7 @@
                                      HBasicBlock* block,
                                      /*out*/HInstruction** result,
                                      bool in_body,
-                                     bool is_min) {
+                                     bool is_min) const {
   if (info != nullptr) {
     // Handle current operation.
     Primitive::Type type = Primitive::kPrimInt;
@@ -586,8 +642,9 @@
       case HInductionVarAnalysis::kLinear: {
         // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
         // to avoid arithmetic wrap-around situations that are hard to guard against.
+        int32_t min_value = 0;
         int32_t stride_value = 0;
-        if (GetConstant(info->op_a, &stride_value)) {
+        if (IsConstantRange(info->op_a, &min_value, &stride_value) && min_value == stride_value) {
           if (stride_value == 1 || stride_value == -1) {
             const bool is_min_a = stride_value == 1 ? is_min : !is_min;
             if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 71b0b1b..974b8fb 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -69,7 +69,7 @@
                          /*out*/bool* needs_finite_test);
 
   /** Refines the values with induction of next outer loop. Returns true on change. */
-  bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val);
+  bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) const;
 
   /**
    * Returns true if range analysis is able to generate code for the lower and upper
@@ -116,46 +116,48 @@
                          /*out*/HInstruction** taken_test);
 
  private:
-  //
-  // Private helper methods.
-  //
+  bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const;
+  bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
+  bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const;
 
-  static bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info);
-  static bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip);
-  static bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip);
+  Value GetLinear(HInductionVarAnalysis::InductionInfo* info,
+                  HInductionVarAnalysis::InductionInfo* trip,
+                  bool in_body,
+                  bool is_min) const;
+  Value GetFetch(HInstruction* instruction,
+                 HInductionVarAnalysis::InductionInfo* trip,
+                 bool in_body,
+                 bool is_min) const;
+  Value GetVal(HInductionVarAnalysis::InductionInfo* info,
+               HInductionVarAnalysis::InductionInfo* trip,
+               bool in_body,
+               bool is_min) const;
+  Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
+               HInductionVarAnalysis::InductionInfo* info2,
+               HInductionVarAnalysis::InductionInfo* trip,
+               bool in_body,
+               bool is_min) const;
+  Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
+               HInductionVarAnalysis::InductionInfo* info2,
+               HInductionVarAnalysis::InductionInfo* trip,
+               bool in_body,
+               bool is_min) const;
 
-  static Value GetFetch(HInstruction* instruction,
-                        HInductionVarAnalysis::InductionInfo* trip,
-                        bool in_body,
-                        bool is_min);
-  static Value GetVal(HInductionVarAnalysis::InductionInfo* info,
-                      HInductionVarAnalysis::InductionInfo* trip,
-                      bool in_body,
-                      bool is_min);
-  static Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
-                      HInductionVarAnalysis::InductionInfo* info2,
-                      HInductionVarAnalysis::InductionInfo* trip,
-                      bool in_body,
-                      bool is_min);
-  static Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
-                      HInductionVarAnalysis::InductionInfo* info2,
-                      HInductionVarAnalysis::InductionInfo* trip,
-                      bool in_body,
-                      bool is_min);
+  bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info,
+                       int32_t *min_value,
+                       int32_t *max_value) const;
 
-  static bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value);
-
-  static Value AddValue(Value v1, Value v2);
-  static Value SubValue(Value v1, Value v2);
-  static Value MulValue(Value v1, Value v2);
-  static Value DivValue(Value v1, Value v2);
-  static Value MergeVal(Value v1, Value v2, bool is_min);
+  Value AddValue(Value v1, Value v2) const;
+  Value SubValue(Value v1, Value v2) const;
+  Value MulValue(Value v1, Value v2) const;
+  Value DivValue(Value v1, Value v2) const;
+  Value MergeVal(Value v1, Value v2, bool is_min) const;
 
   /**
    * Returns refined value using induction of next outer loop or the input value if no
    * further refinement is possible.
    */
-  Value RefineOuter(Value val, bool is_min);
+  Value RefineOuter(Value val, bool is_min) const;
 
   /**
    * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
@@ -170,15 +172,15 @@
                     /*out*/HInstruction** upper,
                     /*out*/HInstruction** taken_test,
                     /*out*/bool* needs_finite_test,
-                    /*out*/bool* needs_taken_test);
+                    /*out*/bool* needs_taken_test) const;
 
-  static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
-                           HInductionVarAnalysis::InductionInfo* trip,
-                           HGraph* graph,
-                           HBasicBlock* block,
-                           /*out*/HInstruction** result,
-                           bool in_body,
-                           bool is_min);
+  bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
+                    HInductionVarAnalysis::InductionInfo* trip,
+                    HGraph* graph,
+                    HBasicBlock* block,
+                    /*out*/HInstruction** result,
+                    bool in_body,
+                    bool is_min) const;
 
   /** Results of prior induction variable analysis. */
   HInductionVarAnalysis *induction_analysis_;
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 128b5bb..5c0bdd7 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -31,9 +31,12 @@
  */
 class InductionVarRangeTest : public testing::Test {
  public:
-  InductionVarRangeTest()  : pool_(), allocator_(&pool_) {
-    graph_ = CreateGraph(&allocator_);
-    iva_ = new (&allocator_) HInductionVarAnalysis(graph_);
+  InductionVarRangeTest()
+      : pool_(),
+        allocator_(&pool_),
+        graph_(CreateGraph(&allocator_)),
+        iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
+        range_(iva_) {
     BuildGraph();
   }
 
@@ -59,6 +62,11 @@
     graph_->AddBlock(exit_block_);
     graph_->SetEntryBlock(entry_block_);
     graph_->SetExitBlock(exit_block_);
+    // Two parameters.
+    x_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+    entry_block_->AddInstruction(x_);
+    y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+    entry_block_->AddInstruction(y_);
   }
 
   /** Constructs loop with given upper bound. */
@@ -103,7 +111,7 @@
     exit_block_->AddInstruction(new (&allocator_) HExit());
   }
 
-  /** Performs induction variable analysis. */
+  /** Constructs SSA and performs induction variable analysis. */
   void PerformInductionVarAnalysis() {
     ASSERT_TRUE(graph_->TryBuildingSsa());
     iva_->Run();
@@ -180,49 +188,51 @@
   //
 
   bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
-    return InductionVarRange::NeedsTripCount(info);
+    return range_.NeedsTripCount(info);
   }
 
   bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
-    return InductionVarRange::IsBodyTripCount(trip);
+    return range_.IsBodyTripCount(trip);
   }
 
   bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
-    return InductionVarRange::IsUnsafeTripCount(trip);
+    return range_.IsUnsafeTripCount(trip);
   }
 
   Value GetMin(HInductionVarAnalysis::InductionInfo* info,
                HInductionVarAnalysis::InductionInfo* induc) {
-    return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true);
+    return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ true);
   }
 
   Value GetMax(HInductionVarAnalysis::InductionInfo* info,
                HInductionVarAnalysis::InductionInfo* induc) {
-    return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ false);
+    return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ false);
   }
 
   Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
                HInductionVarAnalysis::InductionInfo* info2,
                bool is_min) {
-    return InductionVarRange::GetMul(info1, info2, nullptr, /* in_body */ true, is_min);
+    return range_.GetMul(info1, info2, nullptr, /* in_body */ true, is_min);
   }
 
   Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
                HInductionVarAnalysis::InductionInfo* info2,
                bool is_min) {
-    return InductionVarRange::GetDiv(info1, info2, nullptr, /* in_body */ true, is_min);
+    return range_.GetDiv(info1, info2, nullptr, /* in_body */ true, is_min);
   }
 
-  bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t* value) {
-    return InductionVarRange::GetConstant(info, value);
+  bool IsConstantRange(HInductionVarAnalysis::InductionInfo* info,
+                       int32_t* min_value,
+                       int32_t* max_value) {
+    return range_.IsConstantRange(info, min_value, max_value);
   }
 
-  Value AddValue(Value v1, Value v2) { return InductionVarRange::AddValue(v1, v2); }
-  Value SubValue(Value v1, Value v2) { return InductionVarRange::SubValue(v1, v2); }
-  Value MulValue(Value v1, Value v2) { return InductionVarRange::MulValue(v1, v2); }
-  Value DivValue(Value v1, Value v2) { return InductionVarRange::DivValue(v1, v2); }
-  Value MinValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, true); }
-  Value MaxValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, false); }
+  Value AddValue(Value v1, Value v2) { return range_.AddValue(v1, v2); }
+  Value SubValue(Value v1, Value v2) { return range_.SubValue(v1, v2); }
+  Value MulValue(Value v1, Value v2) { return range_.MulValue(v1, v2); }
+  Value DivValue(Value v1, Value v2) { return range_.DivValue(v1, v2); }
+  Value MinValue(Value v1, Value v2) { return range_.MergeVal(v1, v2, true); }
+  Value MaxValue(Value v1, Value v2) { return range_.MergeVal(v1, v2, false); }
 
   // General building fields.
   ArenaPool pool_;
@@ -232,16 +242,17 @@
   HBasicBlock* exit_block_;
   HBasicBlock* loop_preheader_;
   HInductionVarAnalysis* iva_;
+  InductionVarRange range_;
 
   // Instructions.
   HInstruction* condition_;
   HInstruction* increment_;
-  HReturnVoid x_;
-  HReturnVoid y_;
+  HInstruction* x_;
+  HInstruction* y_;
 };
 
 //
-// Tests on static methods.
+// Tests on private methods.
 //
 
 TEST_F(InductionVarRangeTest, TripCountProperties) {
@@ -274,14 +285,14 @@
               GetMin(CreateInvariant('+', CreateConst(2), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(22),
               GetMax(CreateInvariant('+', CreateConst(2), CreateRange(10, 20)), nullptr));
-  ExpectEqual(Value(&x_, 1, -20),
-              GetMin(CreateInvariant('+', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, 1, -10),
-              GetMax(CreateInvariant('+', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, 1, 10),
-              GetMin(CreateInvariant('+', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
-  ExpectEqual(Value(&x_, 1, 20),
-              GetMax(CreateInvariant('+', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
+  ExpectEqual(Value(x_, 1, -20),
+              GetMin(CreateInvariant('+', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, 1, -10),
+              GetMax(CreateInvariant('+', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, 1, 10),
+              GetMin(CreateInvariant('+', CreateRange(10, 20), CreateFetch(x_)), nullptr));
+  ExpectEqual(Value(x_, 1, 20),
+              GetMax(CreateInvariant('+', CreateRange(10, 20), CreateFetch(x_)), nullptr));
   ExpectEqual(Value(5),
               GetMin(CreateInvariant('+', CreateRange(-5, -1), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(19),
@@ -293,14 +304,14 @@
               GetMin(CreateInvariant('-', CreateConst(2), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(-8),
               GetMax(CreateInvariant('-', CreateConst(2), CreateRange(10, 20)), nullptr));
-  ExpectEqual(Value(&x_, 1, 10),
-              GetMin(CreateInvariant('-', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, 1, 20),
-              GetMax(CreateInvariant('-', CreateFetch(&x_), CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, -1, 10),
-              GetMin(CreateInvariant('-', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
-  ExpectEqual(Value(&x_, -1, 20),
-              GetMax(CreateInvariant('-', CreateRange(10, 20), CreateFetch(&x_)), nullptr));
+  ExpectEqual(Value(x_, 1, 10),
+              GetMin(CreateInvariant('-', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, 1, 20),
+              GetMax(CreateInvariant('-', CreateFetch(x_), CreateRange(-20, -10)), nullptr));
+  ExpectEqual(Value(x_, -1, 10),
+              GetMin(CreateInvariant('-', CreateRange(10, 20), CreateFetch(x_)), nullptr));
+  ExpectEqual(Value(x_, -1, 20),
+              GetMax(CreateInvariant('-', CreateRange(10, 20), CreateFetch(x_)), nullptr));
   ExpectEqual(Value(-25),
               GetMin(CreateInvariant('-', CreateRange(-5, -1), CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(-11),
@@ -312,8 +323,8 @@
   ExpectEqual(Value(-10), GetMax(CreateInvariant('n', nullptr, CreateRange(10, 20)), nullptr));
   ExpectEqual(Value(10), GetMin(CreateInvariant('n', nullptr, CreateRange(-20, -10)), nullptr));
   ExpectEqual(Value(20), GetMax(CreateInvariant('n', nullptr, CreateRange(-20, -10)), nullptr));
-  ExpectEqual(Value(&x_, -1, 0), GetMin(CreateInvariant('n', nullptr, CreateFetch(&x_)), nullptr));
-  ExpectEqual(Value(&x_, -1, 0), GetMax(CreateInvariant('n', nullptr, CreateFetch(&x_)), nullptr));
+  ExpectEqual(Value(x_, -1, 0), GetMin(CreateInvariant('n', nullptr, CreateFetch(x_)), nullptr));
+  ExpectEqual(Value(x_, -1, 0), GetMax(CreateInvariant('n', nullptr, CreateFetch(x_)), nullptr));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxMul) {
@@ -336,8 +347,8 @@
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxFetch) {
-  ExpectEqual(Value(&x_, 1, 0), GetMin(CreateFetch(&x_), nullptr));
-  ExpectEqual(Value(&x_, 1, 0), GetMax(CreateFetch(&x_), nullptr));
+  ExpectEqual(Value(x_, 1, 0), GetMin(CreateFetch(x_), nullptr));
+  ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(x_), nullptr));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxLinear) {
@@ -364,45 +375,70 @@
 TEST_F(InductionVarRangeTest, GetMulMin) {
   ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), true));
   ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), true));
+  ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), true));
   ExpectEqual(Value(-50), GetMul(CreateRange(-10, -2), CreateRange(3, 5), true));
   ExpectEqual(Value(6), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-10, -2), CreateRange(-1, 1), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(2, 10), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-10, -2), true));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-1, 1), true));
 }
 
 TEST_F(InductionVarRangeTest, GetMulMax) {
   ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), false));
   ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), false));
+  ExpectEqual(Value(), GetMul(CreateRange(2, 10), CreateRange(-1, 1), false));
   ExpectEqual(Value(-6), GetMul(CreateRange(-10, -2), CreateRange(3, 5), false));
   ExpectEqual(Value(50), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-10, -2), CreateRange(-1, 1), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(2, 10), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-10, -2), false));
+  ExpectEqual(Value(), GetMul(CreateRange(-1, 1), CreateRange(-1, 1), false));
 }
 
 TEST_F(InductionVarRangeTest, GetDivMin) {
   ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), true));
   ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), true));
   ExpectEqual(Value(-500), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), true));
   ExpectEqual(Value(10), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1000, -40), CreateRange(-1, 1), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(40, 1000), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1000, -40), true));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), true));
 }
 
 TEST_F(InductionVarRangeTest, GetDivMax) {
   ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), false));
   ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(40, 1000), CreateRange(-1, 1), false));
   ExpectEqual(Value(-10), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), false));
   ExpectEqual(Value(500), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1000, -40), CreateRange(-1, 1), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(40, 1000), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1000, 40), false));
+  ExpectEqual(Value(), GetDiv(CreateRange(-1, 1), CreateRange(-1, 1), false));
 }
 
-TEST_F(InductionVarRangeTest, GetConstant) {
-  int32_t value;
-  ASSERT_TRUE(GetConstant(CreateConst(12345), &value));
-  EXPECT_EQ(12345, value);
-  EXPECT_FALSE(GetConstant(CreateRange(1, 2), &value));
+TEST_F(InductionVarRangeTest, IsConstantRange) {
+  int32_t min_value;
+  int32_t max_value;
+  ASSERT_TRUE(IsConstantRange(CreateConst(12345), &min_value, &max_value));
+  EXPECT_EQ(12345, min_value);
+  EXPECT_EQ(12345, max_value);
+  ASSERT_TRUE(IsConstantRange(CreateRange(1, 2), &min_value, &max_value));
+  EXPECT_EQ(1, min_value);
+  EXPECT_EQ(2, max_value);
+  EXPECT_FALSE(IsConstantRange(CreateFetch(x_), &min_value, &max_value));
 }
 
 TEST_F(InductionVarRangeTest, AddValue) {
   ExpectEqual(Value(110), AddValue(Value(10), Value(100)));
-  ExpectEqual(Value(-5), AddValue(Value(&x_, 1, -4), Value(&x_, -1, -1)));
-  ExpectEqual(Value(&x_, 3, -5), AddValue(Value(&x_, 2, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), AddValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(&x_, 1, 23), AddValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(&y_, 1, 5), AddValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(-5), AddValue(Value(x_, 1, -4), Value(x_, -1, -1)));
+  ExpectEqual(Value(x_, 3, -5), AddValue(Value(x_, 2, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), AddValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(x_, 1, 23), AddValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(y_, 1, 5), AddValue(Value(55), Value(y_, 1, -50)));
   const int32_t max_value = std::numeric_limits<int32_t>::max();
   ExpectEqual(Value(max_value), AddValue(Value(max_value - 5), Value(5)));
   ExpectEqual(Value(), AddValue(Value(max_value - 5), Value(6)));  // unsafe
@@ -410,11 +446,11 @@
 
 TEST_F(InductionVarRangeTest, SubValue) {
   ExpectEqual(Value(-90), SubValue(Value(10), Value(100)));
-  ExpectEqual(Value(-3), SubValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(&x_, 2, -3), SubValue(Value(&x_, 3, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), SubValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(&x_, 1, 17), SubValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(&y_, -4, 105), SubValue(Value(55), Value(&y_, 4, -50)));
+  ExpectEqual(Value(-3), SubValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(x_, 2, -3), SubValue(Value(x_, 3, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), SubValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(x_, 1, 17), SubValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(y_, -4, 105), SubValue(Value(55), Value(y_, 4, -50)));
   const int32_t min_value = std::numeric_limits<int32_t>::min();
   ExpectEqual(Value(min_value), SubValue(Value(min_value + 5), Value(5)));
   ExpectEqual(Value(), SubValue(Value(min_value + 5), Value(6)));  // unsafe
@@ -422,145 +458,140 @@
 
 TEST_F(InductionVarRangeTest, MulValue) {
   ExpectEqual(Value(1000), MulValue(Value(10), Value(100)));
-  ExpectEqual(Value(), MulValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), MulValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(&x_, 9, 60), MulValue(Value(&x_, 3, 20), Value(3)));
-  ExpectEqual(Value(&y_, 55, -110), MulValue(Value(55), Value(&y_, 1, -2)));
+  ExpectEqual(Value(), MulValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), MulValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(x_, 9, 60), MulValue(Value(x_, 3, 20), Value(3)));
+  ExpectEqual(Value(y_, 55, -110), MulValue(Value(55), Value(y_, 1, -2)));
   ExpectEqual(Value(), MulValue(Value(90000), Value(-90000)));  // unsafe
 }
 
 TEST_F(InductionVarRangeTest, DivValue) {
   ExpectEqual(Value(25), DivValue(Value(100), Value(4)));
-  ExpectEqual(Value(), DivValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(), DivValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(), DivValue(Value(&x_, 12, 24), Value(3)));
-  ExpectEqual(Value(), DivValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(), DivValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(), DivValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(), DivValue(Value(x_, 12, 24), Value(3)));
+  ExpectEqual(Value(), DivValue(Value(55), Value(y_, 1, -50)));
   ExpectEqual(Value(), DivValue(Value(1), Value(0)));  // unsafe
 }
 
 TEST_F(InductionVarRangeTest, MinValue) {
   ExpectEqual(Value(10), MinValue(Value(10), Value(100)));
-  ExpectEqual(Value(&x_, 1, -4), MinValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(&x_, 4, -4), MinValue(Value(&x_, 4, -4), Value(&x_, 4, -1)));
-  ExpectEqual(Value(), MinValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(), MinValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(), MinValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(x_, 1, -4), MinValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(x_, 4, -4), MinValue(Value(x_, 4, -4), Value(x_, 4, -1)));
+  ExpectEqual(Value(), MinValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(), MinValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(), MinValue(Value(55), Value(y_, 1, -50)));
 }
 
 TEST_F(InductionVarRangeTest, MaxValue) {
   ExpectEqual(Value(100), MaxValue(Value(10), Value(100)));
-  ExpectEqual(Value(&x_, 1, -1), MaxValue(Value(&x_, 1, -4), Value(&x_, 1, -1)));
-  ExpectEqual(Value(&x_, 4, -1), MaxValue(Value(&x_, 4, -4), Value(&x_, 4, -1)));
-  ExpectEqual(Value(), MaxValue(Value(&x_, 1, 5), Value(&y_, 1, -7)));
-  ExpectEqual(Value(), MaxValue(Value(&x_, 1, 20), Value(3)));
-  ExpectEqual(Value(), MaxValue(Value(55), Value(&y_, 1, -50)));
+  ExpectEqual(Value(x_, 1, -1), MaxValue(Value(x_, 1, -4), Value(x_, 1, -1)));
+  ExpectEqual(Value(x_, 4, -1), MaxValue(Value(x_, 4, -4), Value(x_, 4, -1)));
+  ExpectEqual(Value(), MaxValue(Value(x_, 1, 5), Value(y_, 1, -7)));
+  ExpectEqual(Value(), MaxValue(Value(x_, 1, 20), Value(3)));
+  ExpectEqual(Value(), MaxValue(Value(55), Value(y_, 1, -50)));
 }
 
 //
-// Tests on instance methods.
+// Tests on public methods.
 //
 
 TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
   BuildLoop(0, graph_->GetIntConstant(1000), 1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
 
   Value v1, v2;
   bool needs_finite_test = true;
 
   // In context of header: known.
-  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
-  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
+  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
   BuildLoop(1000, graph_->GetIntConstant(0), -1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
 
   Value v1, v2;
   bool needs_finite_test = true;
 
   // In context of header: known.
-  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
-  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
+  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
-  HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
-  entry_block_->AddInstruction(parameter);
-  BuildLoop(0, parameter, 1);
+  BuildLoop(0, x_, 1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
 
   Value v1, v2;
   bool needs_finite_test = true;
   bool needs_taken_test = true;
 
   // In context of header: upper unknown.
-  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
-  ExpectEqual(Value(parameter, 1, -1), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
-  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  ExpectEqual(Value(x_, 1, -1), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
+  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
-  ExpectEqual(Value(parameter, 1, 0), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
+  ExpectEqual(Value(x_, 1, 0), v2);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
   HInstruction* taken = nullptr;
 
   // Can generate code in context of loop-body only.
-  EXPECT_FALSE(range.CanGenerateCode(
+  EXPECT_FALSE(range_.CanGenerateCode(
       condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
-  ASSERT_TRUE(range.CanGenerateCode(
+  ASSERT_TRUE(range_.CanGenerateCode(
       increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
   EXPECT_FALSE(needs_finite_test);
   EXPECT_TRUE(needs_taken_test);
 
   // Generates code.
-  range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+  range_.GenerateRangeCode(
+      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
 
   // Verify lower is 0+0.
   ASSERT_TRUE(lower != nullptr);
@@ -581,7 +612,7 @@
   EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
 
   // Verify taken-test is 0<V.
-  range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
   ASSERT_TRUE(taken != nullptr);
   ASSERT_TRUE(taken->IsLessThan());
   ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
@@ -590,52 +621,49 @@
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
-  HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
-  entry_block_->AddInstruction(parameter);
-  BuildLoop(1000, parameter, -1);
+  BuildLoop(1000, x_, -1);
   PerformInductionVarAnalysis();
-  InductionVarRange range(iva_);
 
   Value v1, v2;
   bool needs_finite_test = true;
   bool needs_taken_test = true;
 
   // In context of header: lower unknown.
-  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
-  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
-  ExpectEqual(Value(parameter, 1, 1), v1);
+  ExpectEqual(Value(x_, 1, 1), v1);
   ExpectEqual(Value(1000), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
-  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
+  range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
-  ExpectEqual(Value(parameter, 1, 0), v1);
+  ExpectEqual(Value(x_, 1, 0), v1);
   ExpectEqual(Value(999), v2);
-  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
+  EXPECT_FALSE(range_.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
   HInstruction* taken = nullptr;
 
   // Can generate code in context of loop-body only.
-  EXPECT_FALSE(range.CanGenerateCode(
+  EXPECT_FALSE(range_.CanGenerateCode(
       condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
-  ASSERT_TRUE(range.CanGenerateCode(
+  ASSERT_TRUE(range_.CanGenerateCode(
       increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
   EXPECT_FALSE(needs_finite_test);
   EXPECT_TRUE(needs_taken_test);
 
   // Generates code.
-  range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+  range_.GenerateRangeCode(
+      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
 
-  // Verify lower is 1000-(-(V-1000)-1).
+  // Verify lower is 1000-((1000-V)-1).
   ASSERT_TRUE(lower != nullptr);
   ASSERT_TRUE(lower->IsSub());
   ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
@@ -645,12 +673,10 @@
   ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
   EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
   lower = lower->InputAt(0);
-  ASSERT_TRUE(lower->IsNeg());
-  lower = lower->InputAt(0);
   ASSERT_TRUE(lower->IsSub());
-  EXPECT_TRUE(lower->InputAt(0)->IsParameterValue());
-  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(1000, lower->InputAt(1)->AsIntConstant()->GetValue());
+  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  EXPECT_TRUE(lower->InputAt(1)->IsParameterValue());
 
   // Verify upper is 1000-0.
   ASSERT_TRUE(upper != nullptr);
@@ -661,7 +687,7 @@
   EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
 
   // Verify taken-test is 1000>V.
-  range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  range_.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
   ASSERT_TRUE(taken != nullptr);
   ASSERT_TRUE(taken->IsGreaterThan());
   ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 6946265..67097de 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -211,7 +211,7 @@
 
 // Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
 bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
-  // This simplification is currently supported on ARM and ARM64.
+  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
   // TODO: Implement it for MIPS/64.
   const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
   switch (instruction_set) {
@@ -778,9 +778,9 @@
   // Try to fold an HCompare into this HCondition.
 
   // This simplification is currently supported on x86, x86_64, ARM and ARM64.
-  // TODO: Implement it for MIPS and MIPS64.
+  // TODO: Implement it for MIPS64.
   InstructionSet instruction_set = GetGraph()->GetInstructionSet();
-  if (instruction_set == kMips || instruction_set == kMips64) {
+  if (instruction_set == kMips64) {
     return;
   }
 
@@ -1268,7 +1268,7 @@
 void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) {
   DCHECK(invoke->IsInvokeStaticOrDirect());
   DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
-  // This simplification is currently supported on ARM and ARM64.
+  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
   // TODO: Implement it for MIPS/64.
   const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
   switch (instruction_set) {
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 422bb89..fd454d8 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1790,12 +1790,27 @@
   Location output_loc = locations->Out();
 
   switch (type) {
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       Register output = output_loc.AsRegister<Register>();
       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
-      if (type == Primitive::kPrimNot) {
-        codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      Register output = output_loc.AsRegister<Register>();
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateArrayLoadWithBakerReadBarrier(
+              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+        } else {
+          __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+          codegen->GenerateReadBarrierSlow(
+              invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+        __ MaybeUnpoisonHeapReference(output);
       }
       break;
     }
@@ -1823,8 +1838,10 @@
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
-                                          bool is_long, bool is_volatile) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type,
+                                          bool is_volatile) {
   bool can_call = kEmitCompilerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1836,7 +1853,7 @@
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  if (is_long) {
+  if (type == Primitive::kPrimLong) {
     if (is_volatile) {
       // Need to use XMM to read volatile.
       locations->AddTemp(Location::RequiresFpuRegister());
@@ -1847,25 +1864,30 @@
   } else {
     locations->SetOut(Location::RequiresRegister());
   }
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
 }
 
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ac9b245..ce737e3 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1917,16 +1917,30 @@
   Location offset_loc = locations->InAt(2);
   CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
   Location output_loc = locations->Out();
-  CpuRegister output = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister output = output_loc.AsRegister<CpuRegister>();
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
       __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
-      if (type == Primitive::kPrimNot) {
-        codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+      break;
+
+    case Primitive::kPrimNot: {
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateArrayLoadWithBakerReadBarrier(
+              invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+        } else {
+          __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+          codegen->GenerateReadBarrierSlow(
+              invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+        __ MaybeUnpoisonHeapReference(output);
       }
       break;
+    }
 
     case Primitive::kPrimLong:
       __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
@@ -1938,7 +1952,9 @@
   }
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
   bool can_call = kEmitCompilerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1951,25 +1967,30 @@
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister());
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a385448..b383f1e 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -15,7 +15,6 @@
  */
 
 #include "pc_relative_fixups_x86.h"
-#include "code_generator_x86.h"
 
 namespace art {
 namespace x86 {
@@ -80,10 +79,6 @@
   }
 
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
-    if (switch_insn->GetNumEntries() <=
-        InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
-      return;
-    }
     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
     // address the constant area.
     InitializePCRelativeBasePointer();
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 9457da1..2579ddb 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -147,7 +147,7 @@
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               std::string fmt) {
     std::string str;
-    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
     for (auto reg1 : reg1_registers) {
       for (auto reg2 : reg2_registers) {
@@ -188,14 +188,66 @@
     return str;
   }
 
+  template <typename ImmType, typename Reg1, typename Reg2>
+  std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2),
+                                              const std::vector<Reg1*> reg1_registers,
+                                              const std::vector<Reg2*> reg2_registers,
+                                              std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                              std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                              int imm_bits,
+                                              std::string fmt) {
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
+
+    WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+
+    std::string str;
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        for (int64_t imm : imms) {
+          ImmType new_imm = CreateImmediate(imm);
+          (assembler_.get()->*f)(new_imm, *reg1, *reg2);
+          std::string base = fmt;
+
+          std::string reg1_string = (this->*GetName1)(*reg1);
+          size_t reg1_index;
+          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+          }
+
+          std::string reg2_string = (this->*GetName2)(*reg2);
+          size_t reg2_index;
+          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+          }
+
+          size_t imm_index = base.find(IMM_TOKEN);
+          if (imm_index != std::string::npos) {
+            std::ostringstream sreg;
+            sreg << imm;
+            std::string imm_string = sreg.str();
+            base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+          }
+
+          if (str.size() > 0) {
+            str += "\n";
+          }
+          str += base;
+        }
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
   template <typename RegType, typename ImmType>
   std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType),
-                                              int imm_bits,
-                                              const std::vector<Reg*> registers,
-                                              std::string (AssemblerTest::*GetName)(const RegType&),
-                                              std::string fmt) {
+                                             int imm_bits,
+                                             const std::vector<Reg*> registers,
+                                             std::string (AssemblerTest::*GetName)(const RegType&),
+                                             std::string fmt) {
     std::string str;
-    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0);
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
     for (auto reg : registers) {
       for (int64_t imm : imms) {
@@ -291,6 +343,17 @@
                                                      fmt);
   }
 
+  template <typename ImmType>
+  std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) {
+    return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f,
+                                                                  GetFPRegisters(),
+                                                                  GetFPRegisters(),
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  &AssemblerTest::GetFPRegName,
+                                                                  imm_bits,
+                                                                  fmt);
+  }
+
   std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 733ad2c..afca8ad 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -503,6 +503,18 @@
   EmitI(0x7, rt, static_cast<Register>(0), imm16);
 }
 
+void MipsAssembler::Bc1f(int cc, uint16_t imm16) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16);
+}
+
+void MipsAssembler::Bc1t(int cc, uint16_t imm16) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>((cc << 2) | 1), imm16);
+}
+
 void MipsAssembler::J(uint32_t addr26) {
   EmitI26(0x2, addr26);
 }
@@ -637,7 +649,17 @@
   EmitI21(0x3E, rs, imm21);
 }
 
-void MipsAssembler::EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16) {
+void MipsAssembler::Bc1eqz(FRegister ft, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitFI(0x11, 0x9, ft, imm16);
+}
+
+void MipsAssembler::Bc1nez(FRegister ft, uint16_t imm16) {
+  CHECK(IsR6());
+  EmitFI(0x11, 0xD, ft, imm16);
+}
+
+void MipsAssembler::EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16) {
   switch (cond) {
     case kCondLTZ:
       CHECK_EQ(rt, ZERO);
@@ -669,6 +691,14 @@
       CHECK_EQ(rt, ZERO);
       Bnez(rs, imm16);
       break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1f(static_cast<int>(rs), imm16);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1t(static_cast<int>(rs), imm16);
+      break;
     case kCondLT:
     case kCondGE:
     case kCondLE:
@@ -683,7 +713,7 @@
   }
 }
 
-void MipsAssembler::EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) {
+void MipsAssembler::EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21) {
   switch (cond) {
     case kCondLT:
       Bltc(rs, rt, imm16_21);
@@ -733,6 +763,14 @@
     case kCondGEU:
       Bgeuc(rs, rt, imm16_21);
       break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1eqz(static_cast<FRegister>(rs), imm16_21);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1nez(static_cast<FRegister>(rs), imm16_21);
+      break;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
       UNREACHABLE();
@@ -787,6 +825,202 @@
   EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7);
 }
 
+void MipsAssembler::CunS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31);
+}
+
+void MipsAssembler::CeqS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32);
+}
+
+void MipsAssembler::CueqS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33);
+}
+
+void MipsAssembler::ColtS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34);
+}
+
+void MipsAssembler::CultS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35);
+}
+
+void MipsAssembler::ColeS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36);
+}
+
+void MipsAssembler::CuleS(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37);
+}
+
+void MipsAssembler::CunD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31);
+}
+
+void MipsAssembler::CeqD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32);
+}
+
+void MipsAssembler::CueqD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33);
+}
+
+void MipsAssembler::ColtD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34);
+}
+
+void MipsAssembler::CultD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35);
+}
+
+void MipsAssembler::ColeD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36);
+}
+
+void MipsAssembler::CuleD(int cc, FRegister fs, FRegister ft) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37);
+}
+
+void MipsAssembler::CmpUnS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x01);
+}
+
+void MipsAssembler::CmpEqS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x02);
+}
+
+void MipsAssembler::CmpUeqS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x03);
+}
+
+void MipsAssembler::CmpLtS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x04);
+}
+
+void MipsAssembler::CmpUltS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x05);
+}
+
+void MipsAssembler::CmpLeS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x06);
+}
+
+void MipsAssembler::CmpUleS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x07);
+}
+
+void MipsAssembler::CmpOrS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x11);
+}
+
+void MipsAssembler::CmpUneS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x12);
+}
+
+void MipsAssembler::CmpNeS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x13);
+}
+
+void MipsAssembler::CmpUnD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x01);
+}
+
+void MipsAssembler::CmpEqD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x02);
+}
+
+void MipsAssembler::CmpUeqD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x03);
+}
+
+void MipsAssembler::CmpLtD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x04);
+}
+
+void MipsAssembler::CmpUltD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x05);
+}
+
+void MipsAssembler::CmpLeD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x06);
+}
+
+void MipsAssembler::CmpUleD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x07);
+}
+
+void MipsAssembler::CmpOrD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x11);
+}
+
+void MipsAssembler::CmpUneD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x12);
+}
+
+void MipsAssembler::CmpNeD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x13);
+}
+
+void MipsAssembler::Movf(Register rd, Register rs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01);
+}
+
+void MipsAssembler::Movt(Register rd, Register rs, int cc) {
+  CHECK(!IsR6());
+  CHECK(IsUint<3>(cc)) << cc;
+  EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01);
+}
+
 void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
 }
@@ -1058,6 +1292,10 @@
       CHECK_NE(lhs_reg, ZERO);
       CHECK_EQ(rhs_reg, ZERO);
       break;
+    case kCondF:
+    case kCondT:
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
     case kUncond:
       UNREACHABLE();
   }
@@ -1112,6 +1350,10 @@
       return kCondGEU;
     case kCondGEU:
       return kCondLTU;
+    case kCondF:
+      return kCondT;
+    case kCondT:
+      return kCondF;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
   }
@@ -1514,7 +1756,7 @@
       break;
     case Branch::kCondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      EmitBcond(condition, lhs, rhs, offset);
+      EmitBcondR2(condition, lhs, rhs, offset);
       Nop();  // TODO: improve by filling the delay slot.
       break;
     case Branch::kCall:
@@ -1561,7 +1803,7 @@
       // Note: the opposite condition branch encodes 8 as the distance, which is equal to the
       // number of instructions skipped:
       // (PUSH(IncreaseFrameSize(ADDIU) + SW) + NAL + LUI + ORI + ADDU + LW + JR).
-      EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, 8);
+      EmitBcondR2(Branch::OppositeCondition(condition), lhs, rhs, 8);
       Push(RA);
       Nal();
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -1589,8 +1831,8 @@
       break;
     case Branch::kR6CondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      EmitBcondc(condition, lhs, rhs, offset);
-      Nop();  // TODO: improve by filling the forbidden slot.
+      EmitBcondR6(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the forbidden/delay slot.
       break;
     case Branch::kR6Call:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -1606,7 +1848,7 @@
       Jic(AT, Low16Bits(offset));
       break;
     case Branch::kR6LongCondBranch:
-      EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+      EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2);
       offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Auipc(AT, High16Bits(offset));
@@ -1708,6 +1950,24 @@
   }
 }
 
+void MipsAssembler::Bc1f(int cc, MipsLabel* label) {
+  CHECK(IsUint<3>(cc)) << cc;
+  Bcond(label, kCondF, static_cast<Register>(cc), ZERO);
+}
+
+void MipsAssembler::Bc1t(int cc, MipsLabel* label) {
+  CHECK(IsUint<3>(cc)) << cc;
+  Bcond(label, kCondT, static_cast<Register>(cc), ZERO);
+}
+
+void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label) {
+  Bcond(label, kCondF, static_cast<Register>(ft), ZERO);
+}
+
+void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label) {
+  Bcond(label, kCondT, static_cast<Register>(ft), ZERO);
+}
+
 void MipsAssembler::LoadFromOffset(LoadOperandType type, Register reg, Register base,
                                    int32_t offset) {
   // IsInt<16> must be passed a signed value.
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 62366f6..f569aa8 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -72,8 +72,8 @@
       : scratch_(scratch), stack_adjust_(stack_adjust) {}
 
   MipsExceptionSlowPath(MipsExceptionSlowPath&& src)
-      : scratch_(std::move(src.scratch_)),
-        stack_adjust_(std::move(src.stack_adjust_)),
+      : scratch_(src.scratch_),
+        stack_adjust_(src.stack_adjust_),
         exception_entry_(std::move(src.exception_entry_)) {}
 
  private:
@@ -185,6 +185,8 @@
   void Bgez(Register rt, uint16_t imm16);
   void Blez(Register rt, uint16_t imm16);
   void Bgtz(Register rt, uint16_t imm16);
+  void Bc1f(int cc, uint16_t imm16);  // R2
+  void Bc1t(int cc, uint16_t imm16);  // R2
   void J(uint32_t addr26);
   void Jal(uint32_t addr26);
   void Jalr(Register rd, Register rs);
@@ -208,6 +210,8 @@
   void Bnec(Register rs, Register rt, uint16_t imm16);  // R6
   void Beqzc(Register rs, uint32_t imm21);  // R6
   void Bnezc(Register rs, uint32_t imm21);  // R6
+  void Bc1eqz(FRegister ft, uint16_t imm16);  // R6
+  void Bc1nez(FRegister ft, uint16_t imm16);  // R6
 
   void AddS(FRegister fd, FRegister fs, FRegister ft);
   void SubS(FRegister fd, FRegister fs, FRegister ft);
@@ -222,6 +226,43 @@
   void NegS(FRegister fd, FRegister fs);
   void NegD(FRegister fd, FRegister fs);
 
+  void CunS(int cc, FRegister fs, FRegister ft);  // R2
+  void CeqS(int cc, FRegister fs, FRegister ft);  // R2
+  void CueqS(int cc, FRegister fs, FRegister ft);  // R2
+  void ColtS(int cc, FRegister fs, FRegister ft);  // R2
+  void CultS(int cc, FRegister fs, FRegister ft);  // R2
+  void ColeS(int cc, FRegister fs, FRegister ft);  // R2
+  void CuleS(int cc, FRegister fs, FRegister ft);  // R2
+  void CunD(int cc, FRegister fs, FRegister ft);  // R2
+  void CeqD(int cc, FRegister fs, FRegister ft);  // R2
+  void CueqD(int cc, FRegister fs, FRegister ft);  // R2
+  void ColtD(int cc, FRegister fs, FRegister ft);  // R2
+  void CultD(int cc, FRegister fs, FRegister ft);  // R2
+  void ColeD(int cc, FRegister fs, FRegister ft);  // R2
+  void CuleD(int cc, FRegister fs, FRegister ft);  // R2
+  void CmpUnS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpEqS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUeqS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLtS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUltS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLeS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUleS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpOrS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUneS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpNeS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUnD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpEqD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUeqD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLtD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUltD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpLeD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUleD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpOrD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpUneD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void CmpNeD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void Movf(Register rd, Register rs, int cc);  // R2
+  void Movt(Register rd, Register rs, int cc);  // R2
+
   void Cvtsw(FRegister fd, FRegister fs);
   void Cvtdw(FRegister fd, FRegister fs);
   void Cvtsd(FRegister fd, FRegister fs);
@@ -267,6 +308,10 @@
   void Bge(Register rs, Register rt, MipsLabel* label);
   void Bltu(Register rs, Register rt, MipsLabel* label);
   void Bgeu(Register rs, Register rt, MipsLabel* label);
+  void Bc1f(int cc, MipsLabel* label);  // R2
+  void Bc1t(int cc, MipsLabel* label);  // R2
+  void Bc1eqz(FRegister ft, MipsLabel* label);  // R6
+  void Bc1nez(FRegister ft, MipsLabel* label);  // R6
 
   void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, Register reg, Register base, int32_t offset);
@@ -296,7 +341,8 @@
   //
 
   // Emit code that will create an activation on the stack.
-  void BuildFrame(size_t frame_size, ManagedRegister method_reg,
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
                   const std::vector<ManagedRegister>& callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
@@ -314,58 +360,85 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister mscratch)
-      OVERRIDE;
+  void StoreImmediateToThread32(ThreadOffset<kMipsWordSize> dest,
+                                uint32_t imm,
+                                ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
+  void StoreStackOffsetToThread32(ThreadOffset<kMipsWordSize> thr_offs,
+                                  FrameOffset fr_offs,
                                   ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread32(ThreadOffset<kMipsWordSize> thr_offs) OVERRIDE;
 
-  void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
+  void StoreSpanning(FrameOffset dest,
+                     ManagedRegister msrc,
+                     FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
 
   // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread32(ManagedRegister mdest, ThreadOffset<4> src, size_t size) OVERRIDE;
+  void LoadFromThread32(ManagedRegister mdest,
+                        ThreadOffset<kMipsWordSize> src,
+                        size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
-  void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
+  void LoadRef(ManagedRegister mdest,
+               ManagedRegister base,
+               MemberOffset offs,
                bool unpoison_reference) OVERRIDE;
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<4> offs) OVERRIDE;
+  void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset<kMipsWordSize> offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
+  void CopyRawPtrFromThread32(FrameOffset fr_offs,
+                              ThreadOffset<kMipsWordSize> thr_offs,
                               ManagedRegister mscratch) OVERRIDE;
 
-  void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrToThread32(ThreadOffset<kMipsWordSize> thr_offs,
+                            FrameOffset fr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
 
-  void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch,
+  void Copy(FrameOffset dest,
+            ManagedRegister src_base,
+            Offset src_offset,
+            ManagedRegister mscratch,
             size_t size) OVERRIDE;
 
-  void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-            ManagedRegister mscratch, size_t size) OVERRIDE;
-
-  void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch,
+  void Copy(ManagedRegister dest_base,
+            Offset dest_offset,
+            FrameOffset src,
+            ManagedRegister mscratch,
             size_t size) OVERRIDE;
 
-  void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
-            ManagedRegister mscratch, size_t size) OVERRIDE;
+  void Copy(FrameOffset dest,
+            FrameOffset src_base,
+            Offset src_offset,
+            ManagedRegister mscratch,
+            size_t size) OVERRIDE;
 
-  void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-            ManagedRegister mscratch, size_t size) OVERRIDE;
+  void Copy(ManagedRegister dest,
+            Offset dest_offset,
+            ManagedRegister src,
+            Offset src_offset,
+            ManagedRegister mscratch,
+            size_t size) OVERRIDE;
+
+  void Copy(FrameOffset dest,
+            Offset dest_offset,
+            FrameOffset src,
+            Offset src_offset,
+            ManagedRegister mscratch,
+            size_t size) OVERRIDE;
 
   void MemoryBarrier(ManagedRegister) OVERRIDE;
 
@@ -383,13 +456,17 @@
   // value is null and null_allowed. in_reg holds a possibly stale reference
   // that can be used to avoid loading the handle scope entry to see if the value is
   // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) OVERRIDE;
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) OVERRIDE;
 
   // Set up out_off to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister mscratch, bool null_allowed) OVERRIDE;
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister mscratch,
+                              bool null_allowed) OVERRIDE;
 
   // src holds a handle scope entry (Object**) load this into dst.
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
@@ -402,7 +479,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset<4> offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread32(ThreadOffset<kMipsWordSize> offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
@@ -437,6 +514,8 @@
     kCondNEZ,
     kCondLTU,
     kCondGEU,
+    kCondF,    // Floating-point predicate false.
+    kCondT,    // Floating-point predicate true.
     kUncond,
   };
   friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
@@ -543,7 +622,22 @@
     //
     // Composite branches (made of several instructions) with longer reach have 32-bit
     // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
-    // The composite branches cover the range of PC + +/-2GB.
+    // The composite branches cover the range of PC + +/-2GB on MIPS32 CPUs. However,
+    // the range is not end-to-end on MIPS64 (unless addresses are forced to zero- or
+    // sign-extend from 32 to 64 bits by the appropriate CPU configuration).
+    // Consider the following implementation of a long unconditional branch, for
+    // example:
+    //
+    //   auipc at, offset_31_16  // at = pc + sign_extend(offset_31_16) << 16
+    //   jic   at, offset_15_0   // pc = at + sign_extend(offset_15_0)
+    //
+    // Both of the above instructions take 16-bit signed offsets as immediate operands.
+    // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000
+    // due to sign extension. This must be compensated for by incrementing offset_31_16
+    // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is
+    // 0x7FFF, adding 1 will overflow the positive offset into the negative range.
+    // Therefore, the long branch range is something like from PC - 0x80000000 to
+    // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side.
     //
     // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
     // case with the addiu instruction and a 16 bit offset.
@@ -580,17 +674,17 @@
     // Helper for the above.
     void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
 
-    uint32_t old_location_;          // Offset into assembler buffer in bytes.
-    uint32_t location_;              // Offset into assembler buffer in bytes.
-    uint32_t target_;                // Offset into assembler buffer in bytes.
+    uint32_t old_location_;      // Offset into assembler buffer in bytes.
+    uint32_t location_;          // Offset into assembler buffer in bytes.
+    uint32_t target_;            // Offset into assembler buffer in bytes.
 
-    uint32_t lhs_reg_ : 5;           // Left-hand side register in conditional branches or
-                                     // indirect call register.
-    uint32_t rhs_reg_ : 5;           // Right-hand side register in conditional branches.
-    BranchCondition condition_ : 5;  // Condition for conditional branches.
+    uint32_t lhs_reg_;           // Left-hand side register in conditional branches or
+                                 // indirect call register.
+    uint32_t rhs_reg_;           // Right-hand side register in conditional branches.
+    BranchCondition condition_;  // Condition for conditional branches.
 
-    Type type_ : 5;                  // Current type of the branch.
-    Type old_type_ : 5;              // Initial type of the branch.
+    Type type_;                  // Current type of the branch.
+    Type old_type_;              // Initial type of the branch.
   };
   friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
   friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
@@ -601,8 +695,8 @@
   void EmitI26(int opcode, uint32_t imm26);
   void EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
-  void EmitBcond(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
-  void EmitBcondc(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);  // R6
+  void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
+  void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);
 
   void Buncond(MipsLabel* label);
   void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 063d8bd..6f8b3e8 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -21,6 +21,8 @@
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
+#define __ GetAssembler()->
+
 namespace art {
 
 struct MIPSCpuRegisterCompare {
@@ -184,6 +186,63 @@
     return result;
   }
 
+  void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                             mips::MipsLabel*),
+                              std::string instr_name) {
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::A0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::A1, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a1, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
+                                                              mips::Register,
+                                                              mips::MipsLabel*),
+                               std::string instr_name) {
+    mips::MipsLabel label;
+    (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, $a1, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a2, $a3, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
  private:
   std::vector<mips::Register*> registers_;
   std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_;
@@ -196,8 +255,6 @@
   EXPECT_TRUE(CheckTools());
 }
 
-#define __ GetAssembler()->
-
 TEST_F(AssemblerMIPSTest, Addu) {
   DriverStr(RepeatRRR(&mips::MipsAssembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "Addu");
 }
@@ -418,6 +475,84 @@
   DriverStr(RepeatFF(&mips::MipsAssembler::NegD, "neg.d ${reg1}, ${reg2}"), "NegD");
 }
 
+TEST_F(AssemblerMIPSTest, CunS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CunS, 3, "c.un.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CunS");
+}
+
+TEST_F(AssemblerMIPSTest, CeqS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CeqS, 3, "c.eq.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CeqS");
+}
+
+TEST_F(AssemblerMIPSTest, CueqS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CueqS, 3, "c.ueq.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CueqS");
+}
+
+TEST_F(AssemblerMIPSTest, ColtS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColtS, 3, "c.olt.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColtS");
+}
+
+TEST_F(AssemblerMIPSTest, CultS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CultS, 3, "c.ult.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CultS");
+}
+
+TEST_F(AssemblerMIPSTest, ColeS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColeS, 3, "c.ole.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColeS");
+}
+
+TEST_F(AssemblerMIPSTest, CuleS) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CuleS, 3, "c.ule.s $fcc{imm}, ${reg1}, ${reg2}"),
+            "CuleS");
+}
+
+TEST_F(AssemblerMIPSTest, CunD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CunD, 3, "c.un.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CunD");
+}
+
+TEST_F(AssemblerMIPSTest, CeqD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CeqD, 3, "c.eq.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CeqD");
+}
+
+TEST_F(AssemblerMIPSTest, CueqD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CueqD, 3, "c.ueq.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CueqD");
+}
+
+TEST_F(AssemblerMIPSTest, ColtD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColtD, 3, "c.olt.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColtD");
+}
+
+TEST_F(AssemblerMIPSTest, CultD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CultD, 3, "c.ult.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CultD");
+}
+
+TEST_F(AssemblerMIPSTest, ColeD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::ColeD, 3, "c.ole.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "ColeD");
+}
+
+TEST_F(AssemblerMIPSTest, CuleD) {
+  DriverStr(RepeatIbFF(&mips::MipsAssembler::CuleD, 3, "c.ule.d $fcc{imm}, ${reg1}, ${reg2}"),
+            "CuleD");
+}
+
+TEST_F(AssemblerMIPSTest, Movf) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Movf, 3, "movf ${reg1}, ${reg2}, $fcc{imm}"), "Movf");
+}
+
+TEST_F(AssemblerMIPSTest, Movt) {
+  DriverStr(RepeatRRIb(&mips::MipsAssembler::Movt, 3, "movt ${reg1}, ${reg2}, $fcc{imm}"), "Movt");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSW) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW");
 }
@@ -1000,55 +1135,11 @@
 }
 
 TEST_F(AssemblerMIPSTest, Beq) {
-  mips::MipsLabel label;
-  __ Beq(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Beq(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "beq $a0, $a1, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "beq $a2, $a3, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Beq");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq");
 }
 
 TEST_F(AssemblerMIPSTest, Bne) {
-  mips::MipsLabel label;
-  __ Bne(mips::A0, mips::A1, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bne(mips::A2, mips::A3, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bne $a0, $a1, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bne $a2, $a3, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bne");
+  BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne");
 }
 
 TEST_F(AssemblerMIPSTest, Beqz) {
@@ -1104,107 +1195,19 @@
 }
 
 TEST_F(AssemblerMIPSTest, Bltz) {
-  mips::MipsLabel label;
-  __ Bltz(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bltz(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bltz $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bltz $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bltz");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz");
 }
 
 TEST_F(AssemblerMIPSTest, Bgez) {
-  mips::MipsLabel label;
-  __ Bgez(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bgez(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bgez $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bgez $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bgez");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez");
 }
 
 TEST_F(AssemblerMIPSTest, Blez) {
-  mips::MipsLabel label;
-  __ Blez(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Blez(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "blez $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "blez $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Blez");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez");
 }
 
 TEST_F(AssemblerMIPSTest, Bgtz) {
-  mips::MipsLabel label;
-  __ Bgtz(mips::A0, &label);
-  constexpr size_t kAdduCount1 = 63;
-  for (size_t i = 0; i != kAdduCount1; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bind(&label);
-  constexpr size_t kAdduCount2 = 64;
-  for (size_t i = 0; i != kAdduCount2; ++i) {
-    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
-  }
-  __ Bgtz(mips::A1, &label);
-
-  std::string expected =
-      ".set noreorder\n"
-      "bgtz $a0, 1f\n"
-      "nop\n" +
-      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
-      "1:\n" +
-      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
-      "bgtz $a1, 1b\n"
-      "nop\n";
-  DriverStr(expected, "Bgtz");
+  BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz");
 }
 
 TEST_F(AssemblerMIPSTest, Blt) {
@@ -1319,6 +1322,58 @@
   DriverStr(expected, "Bgeu");
 }
 
+TEST_F(AssemblerMIPSTest, Bc1f) {
+  mips::MipsLabel label;
+  __ Bc1f(0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bc1f(7, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc1f $fcc0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bc1f $fcc7, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bc1f");
+}
+
+TEST_F(AssemblerMIPSTest, Bc1t) {
+  mips::MipsLabel label;
+  __ Bc1t(0, &label);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bc1t(7, &label);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc1t $fcc0, 1f\n"
+      "nop\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "bc1t $fcc7, 1b\n"
+      "nop\n";
+  DriverStr(expected, "Bc1t");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 9a18635..1a2f2c2 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -471,18 +471,19 @@
 /*
  * Callback for dumping each positions table entry.
  */
-static bool dumpPositionsCb(void* /*context*/, u4 address, u4 lineNum) {
-  fprintf(gOutFile, "        0x%04x line=%d\n", address, lineNum);
+static bool dumpPositionsCb(void* /*context*/, const DexFile::PositionInfo& entry) {
+  fprintf(gOutFile, "        0x%04x line=%d\n", entry.address_, entry.line_);
   return false;
 }
 
 /*
  * Callback for dumping locals table entry.
  */
-static void dumpLocalsCb(void* /*context*/, u2 slot, u4 startAddress, u4 endAddress,
-                         const char* name, const char* descriptor, const char* signature) {
+static void dumpLocalsCb(void* /*context*/, const DexFile::LocalInfo& entry) {
+  const char* signature = entry.signature_ != nullptr ? entry.signature_ : "";
   fprintf(gOutFile, "        0x%04x - 0x%04x reg=%d %s %s %s\n",
-          startAddress, endAddress, slot, name, descriptor, signature);
+          entry.start_address_, entry.end_address_, entry.reg_,
+          entry.name_, entry.descriptor_, signature);
 }
 
 /*
@@ -900,11 +901,9 @@
   // Positions and locals table in the debug info.
   bool is_static = (flags & kAccStatic) != 0;
   fprintf(gOutFile, "      positions     : \n");
-  pDexFile->DecodeDebugInfo(
-      pCode, is_static, idx, dumpPositionsCb, nullptr, nullptr);
+  pDexFile->DecodeDebugPositionInfo(pCode, dumpPositionsCb, nullptr);
   fprintf(gOutFile, "      locals        : \n");
-  pDexFile->DecodeDebugInfo(
-      pCode, is_static, idx, nullptr, dumpLocalsCb, nullptr);
+  pDexFile->DecodeDebugLocalInfo(pCode, is_static, idx, dumpLocalsCb, nullptr);
 }
 
 /*
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index 1d0f75e..d20c169 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -80,10 +80,10 @@
  * first line in the method, which *should* correspond to the first
  * entry from the table.  (Could also use "min" here.)
  */
-static bool positionsCb(void* context, u4 /*address*/, u4 lineNum) {
+static bool positionsCb(void* context, const DexFile::PositionInfo& entry) {
   int* pFirstLine = reinterpret_cast<int *>(context);
   if (*pFirstLine == -1) {
-    *pFirstLine = lineNum;
+    *pFirstLine = entry.line_;
   }
   return 0;
 }
@@ -92,7 +92,7 @@
  * Dumps a method.
  */
 static void dumpMethod(const DexFile* pDexFile,
-                       const char* fileName, u4 idx, u4 flags,
+                       const char* fileName, u4 idx, u4 flags ATTRIBUTE_UNUSED,
                        const DexFile::CodeItem* pCode, u4 codeOffset) {
   // Abstract and native methods don't get listed.
   if (pCode == nullptr || codeOffset == 0) {
@@ -121,9 +121,7 @@
 
   // Find the first line.
   int firstLine = -1;
-  bool is_static = (flags & kAccStatic) != 0;
-  pDexFile->DecodeDebugInfo(
-     pCode, is_static, idx, positionsCb, nullptr, &firstLine);
+  pDexFile->DecodeDebugPositionInfo(pCode, positionsCb, &firstLine);
 
   // Method signature.
   const Signature signature = pDexFile->GetMethodSignature(pMethodId);
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 2d15f6f..cd64a4f 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -111,6 +111,8 @@
   { kRTypeMask | (0x1f << 21), 63, "dsra32", "DTA", },
 
   // SPECIAL0
+  { kSpecial0Mask | 0x307ff, 1, "movf", "DSc" },
+  { kSpecial0Mask | 0x307ff, 0x10001, "movt", "DSc" },
   { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" },
   { kSpecial0Mask | 0x7ff, (3 << 6) | 24, "muh", "DST" },
   { kSpecial0Mask | 0x7ff, (2 << 6) | 25, "mulu", "DST" },
@@ -216,6 +218,11 @@
   { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "TI", },
   { kITypeMask, 15 << kOpcodeShift, "aui", "TSI", },
 
+  { kITypeMask | (0x3e3 << 16), (17 << kOpcodeShift) | (8 << 21), "bc1f", "cB" },
+  { kITypeMask | (0x3e3 << 16), (17 << kOpcodeShift) | (8 << 21) | (1 << 16), "bc1t", "cB" },
+  { kITypeMask | (0x1f << 21), (17 << kOpcodeShift) | (9 << 21), "bc1eqz", "tB" },
+  { kITypeMask | (0x1f << 21), (17 << kOpcodeShift) | (13 << 21), "bc1nez", "tB" },
+
   { kITypeMask | (0x1f << 21), 22 << kOpcodeShift, "blezc", "TB" },
 
   // TODO: de-dup
@@ -333,6 +340,26 @@
   { kFpMask | (0x1f << 21), kCop1 | (0x04 << 21), "mtc1", "Td" },
   { kFpMask | (0x1f << 21), kCop1 | (0x05 << 21), "dmtc1", "Td" },
   { kFpMask | (0x1f << 21), kCop1 | (0x07 << 21), "mthc1", "Td" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 1, "cmp.un.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 2, "cmp.eq.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 3, "cmp.ueq.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 4, "cmp.lt.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 5, "cmp.ult.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 6, "cmp.le.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 7, "cmp.ule.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 17, "cmp.or.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 18, "cmp.une.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x14 << 21) | 19, "cmp.ne.s", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 1, "cmp.un.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 2, "cmp.eq.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 3, "cmp.ueq.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 4, "cmp.lt.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 5, "cmp.ult.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 6, "cmp.le.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 7, "cmp.ule.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 17, "cmp.or.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 18, "cmp.une.d", "adt" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x15 << 21) | 19, "cmp.ne.d", "adt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 0, "add", "fadt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 1, "sub", "fadt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 2, "mul", "fadt" },
@@ -356,6 +383,13 @@
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 36, "cvt.w", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 37, "cvt.l", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 38, "cvt.ps", "fad" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 49, "c.un", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 50, "c.eq", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 51, "c.ueq", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 52, "c.olt", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 53, "c.ult", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 54, "c.ole", "fCdt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 55, "c.ule", "fCdt" },
   { kFpMask, kCop1 | 0x10, "sel", "fadt" },
   { kFpMask, kCop1 | 0x1e, "max", "fadt" },
   { kFpMask, kCop1 | 0x1c, "min", "fadt" },
@@ -408,6 +442,12 @@
                    << StringPrintf("  ; %+d", offset);
             }
             break;
+          case 'C':  // Floating-point condition code flag in c.<cond>.fmt.
+            args << "cc" << (sa >> 2);
+            break;
+          case 'c':  // Floating-point condition code flag in bc1f/bc1t and movf/movt.
+            args << "cc" << (rt >> 2);
+            break;
           case 'D': args << 'r' << rd; break;
           case 'd': args << 'f' << rd; break;
           case 'a': args << 'f' << sa; break;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index f2d7f17..4096117 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -42,6 +42,7 @@
   check_jni.cc \
   class_linker.cc \
   class_table.cc \
+  code_simulator_container.cc \
   common_throws.cc \
   debugger.cc \
   dex_file.cc \
@@ -501,9 +502,19 @@
   LOCAL_C_INCLUDES += art
 
   ifeq ($$(art_static_or_shared),static)
-    LOCAL_STATIC_LIBRARIES := libnativehelper libnativebridge libsigchain_dummy libbacktrace liblz4
+    LOCAL_STATIC_LIBRARIES := libnativehelper
+    LOCAL_STATIC_LIBRARIES += libnativebridge
+    LOCAL_STATIC_LIBRARIES += libnativeloader
+    LOCAL_STATIC_LIBRARIES += libsigchain_dummy
+    LOCAL_STATIC_LIBRARIES += libbacktrace
+    LOCAL_STATIC_LIBRARIES += liblz4
   else
-    LOCAL_SHARED_LIBRARIES := libnativehelper libnativebridge libsigchain libbacktrace liblz4
+    LOCAL_SHARED_LIBRARIES := libnativehelper
+    LOCAL_SHARED_LIBRARIES += libnativebridge
+    LOCAL_SHARED_LIBRARIES += libnativeloader
+    LOCAL_SHARED_LIBRARIES += libsigchain
+    LOCAL_SHARED_LIBRARIES += libbacktrace
+    LOCAL_SHARED_LIBRARIES += liblz4
   endif
 
   ifeq ($$(art_target_or_host),target)
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index be33b0e..7141181 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -166,6 +166,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = artReadBarrierMark;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 63285a4..5c8ff8f 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -149,6 +149,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = artReadBarrierMark;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index 74e7638..0d01ad5 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -45,6 +45,7 @@
       entrypoint == kQuickCmpgFloat ||
       entrypoint == kQuickCmplDouble ||
       entrypoint == kQuickCmplFloat ||
+      entrypoint == kQuickReadBarrierMark ||
       entrypoint == kQuickReadBarrierSlow ||
       entrypoint == kQuickReadBarrierForRootSlow;
 }
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index cba427d..51eb77f 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -274,6 +274,8 @@
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMark = artReadBarrierMark;
+  static_assert(IsDirectEntrypoint(kQuickReadBarrierMark), "Direct C stub not marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 89f54dd..4bdb38e 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -180,6 +180,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = artReadBarrierMark;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index aca49cc..e593f39 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -28,6 +28,7 @@
                                             const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -158,6 +159,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 };
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 463c9cf..da30331 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1686,6 +1686,14 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+DEFINE_FUNCTION art_quick_read_barrier_mark
+    PUSH eax                         // pass arg1 - obj
+    call SYMBOL(artReadBarrierMark)  // artReadBarrierMark(obj)
+    addl LITERAL(4), %esp            // pop argument
+    CFI_ADJUST_CFA_OFFSET(-4)
+    ret
+END_FUNCTION art_quick_read_barrier_mark
+
 DEFINE_FUNCTION art_quick_read_barrier_slow
     PUSH edx                         // pass arg3 - offset
     PUSH ecx                         // pass arg2 - obj
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index ebe6d40..0a5d14a 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -29,6 +29,7 @@
                                                    const mirror::Class* ref_class);
 
 // Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_mark(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
@@ -162,6 +163,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierMark = art_quick_read_barrier_mark;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
   qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 #endif  // __APPLE__
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 17d277e..883da96 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1712,6 +1712,17 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+DEFINE_FUNCTION art_quick_read_barrier_mark
+    SETUP_FP_CALLEE_SAVE_FRAME
+    subq LITERAL(8), %rsp           // Alignment padding.
+    CFI_ADJUST_CFA_OFFSET(8)
+    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    ret
+END_FUNCTION art_quick_read_barrier_slow
+
 DEFINE_FUNCTION art_quick_read_barrier_slow
     SETUP_FP_CALLEE_SAVE_FRAME
     subq LITERAL(8), %rsp           // Alignment padding.
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 29c8232..c86614c 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -122,7 +122,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 168 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 169 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 2cd1a4d..115c260 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -48,6 +48,7 @@
   bool oat;
   bool profiler;
   bool signals;
+  bool simulator;
   bool startup;
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
diff --git a/runtime/code_simulator_container.cc b/runtime/code_simulator_container.cc
new file mode 100644
index 0000000..d884c58
--- /dev/null
+++ b/runtime/code_simulator_container.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dlfcn.h>
+
+#include "code_simulator_container.h"
+#include "globals.h"
+
+namespace art {
+
+CodeSimulatorContainer::CodeSimulatorContainer(InstructionSet target_isa)
+    : libart_simulator_handle_(nullptr),
+      simulator_(nullptr) {
+  const char* libart_simulator_so_name =
+      kIsDebugBuild ? "libartd-simulator.so" : "libart-simulator.so";
+  libart_simulator_handle_ = dlopen(libart_simulator_so_name, RTLD_NOW);
+  // It is not a real error when libart-simulator does not exist, e.g., on target.
+  if (libart_simulator_handle_ == nullptr) {
+    VLOG(simulator) << "Could not load " << libart_simulator_so_name << ": " << dlerror();
+  } else {
+    typedef CodeSimulator* (*create_code_simulator_ptr_)(InstructionSet target_isa);
+    create_code_simulator_ptr_ create_code_simulator_ =
+        reinterpret_cast<create_code_simulator_ptr_>(
+            dlsym(libart_simulator_handle_, "CreateCodeSimulator"));
+    DCHECK(create_code_simulator_ != nullptr) << "Fail to find symbol of CreateCodeSimulator: "
+        << dlerror();
+    simulator_ = create_code_simulator_(target_isa);
+  }
+}
+
+CodeSimulatorContainer::~CodeSimulatorContainer() {
+  // Free simulator object before closing libart-simulator because destructor of
+  // CodeSimulator lives in it.
+  if (simulator_ != nullptr) {
+    delete simulator_;
+  }
+  if (libart_simulator_handle_ != nullptr) {
+    dlclose(libart_simulator_handle_);
+  }
+}
+
+}  // namespace art
diff --git a/runtime/code_simulator_container.h b/runtime/code_simulator_container.h
new file mode 100644
index 0000000..655a247
--- /dev/null
+++ b/runtime/code_simulator_container.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CODE_SIMULATOR_CONTAINER_H_
+#define ART_RUNTIME_CODE_SIMULATOR_CONTAINER_H_
+
+#include "arch/instruction_set.h"
+#include "simulator/code_simulator.h"
+
+namespace art {
+
+// This container dynamically opens and closes libart-simulator.
+class CodeSimulatorContainer {
+ public:
+  explicit CodeSimulatorContainer(InstructionSet target_isa);
+  ~CodeSimulatorContainer();
+
+  bool CanSimulate() const {
+    return simulator_ != nullptr;
+  }
+
+  CodeSimulator* Get() {
+    DCHECK(CanSimulate());
+    return simulator_;
+  }
+
+  const CodeSimulator* Get() const {
+    DCHECK(CanSimulate());
+    return simulator_;
+  }
+
+ private:
+  void* libart_simulator_handle_;
+  CodeSimulator* simulator_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeSimulatorContainer);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CODE_SIMULATOR_CONTAINER_H_
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 51f57c3..e5d648b 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1536,10 +1536,10 @@
     int numItems;
     JDWP::ExpandBuf* pReply;
 
-    static bool Callback(void* context, uint32_t address, uint32_t line_number) {
+    static bool Callback(void* context, const DexFile::PositionInfo& entry) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
-      expandBufAdd8BE(pContext->pReply, address);
-      expandBufAdd4BE(pContext->pReply, line_number);
+      expandBufAdd8BE(pContext->pReply, entry.address_);
+      expandBufAdd4BE(pContext->pReply, entry.line_);
       pContext->numItems++;
       return false;
     }
@@ -1569,8 +1569,7 @@
   context.pReply = pReply;
 
   if (code_item != nullptr) {
-    m->GetDexFile()->DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
-                                     DebugCallbackContext::Callback, nullptr, &context);
+    m->GetDexFile()->DecodeDebugPositionInfo(code_item, DebugCallbackContext::Callback, &context);
   }
 
   JDWP::Set4BE(expandBufGetBuffer(pReply) + numLinesOffset, context.numItems);
@@ -1584,25 +1583,26 @@
     size_t variable_count;
     bool with_generic;
 
-    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress,
-                         const char* name, const char* descriptor, const char* signature)
+    static void Callback(void* context, const DexFile::LocalInfo& entry)
         SHARED_REQUIRES(Locks::mutator_lock_) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
 
+      uint16_t slot = entry.reg_;
       VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d",
-                                 pContext->variable_count, startAddress, endAddress - startAddress,
-                                 name, descriptor, signature, slot,
+                                 pContext->variable_count, entry.start_address_,
+                                 entry.end_address_ - entry.start_address_,
+                                 entry.name_, entry.descriptor_, entry.signature_, slot,
                                  MangleSlot(slot, pContext->method));
 
       slot = MangleSlot(slot, pContext->method);
 
-      expandBufAdd8BE(pContext->pReply, startAddress);
-      expandBufAddUtf8String(pContext->pReply, name);
-      expandBufAddUtf8String(pContext->pReply, descriptor);
+      expandBufAdd8BE(pContext->pReply, entry.start_address_);
+      expandBufAddUtf8String(pContext->pReply, entry.name_);
+      expandBufAddUtf8String(pContext->pReply, entry.descriptor_);
       if (pContext->with_generic) {
-        expandBufAddUtf8String(pContext->pReply, signature);
+        expandBufAddUtf8String(pContext->pReply, entry.signature_);
       }
-      expandBufAdd4BE(pContext->pReply, endAddress - startAddress);
+      expandBufAdd4BE(pContext->pReply, entry.end_address_- entry.start_address_);
       expandBufAdd4BE(pContext->pReply, slot);
 
       ++pContext->variable_count;
@@ -1627,8 +1627,8 @@
 
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item != nullptr) {
-    m->GetDexFile()->DecodeDebugInfo(
-        code_item, m->IsStatic(), m->GetDexMethodIndex(), nullptr, DebugCallbackContext::Callback,
+    m->GetDexFile()->DecodeDebugLocalInfo(
+        code_item, m->IsStatic(), m->GetDexMethodIndex(), DebugCallbackContext::Callback,
         &context);
   }
 
@@ -3716,19 +3716,19 @@
           code_item_(code_item), last_pc_valid(false), last_pc(0) {
     }
 
-    static bool Callback(void* raw_context, uint32_t address, uint32_t line_number_cb) {
+    static bool Callback(void* raw_context, const DexFile::PositionInfo& entry) {
       DebugCallbackContext* context = reinterpret_cast<DebugCallbackContext*>(raw_context);
-      if (static_cast<int32_t>(line_number_cb) == context->line_number_) {
+      if (static_cast<int32_t>(entry.line_) == context->line_number_) {
         if (!context->last_pc_valid) {
           // Everything from this address until the next line change is ours.
-          context->last_pc = address;
+          context->last_pc = entry.address_;
           context->last_pc_valid = true;
         }
         // Otherwise, if we're already in a valid range for this line,
         // just keep going (shouldn't really happen)...
       } else if (context->last_pc_valid) {  // and the line number is new
         // Add everything from the last entry up until here to the set
-        for (uint32_t dex_pc = context->last_pc; dex_pc < address; ++dex_pc) {
+        for (uint32_t dex_pc = context->last_pc; dex_pc < entry.address_; ++dex_pc) {
           context->single_step_control_->AddDexPc(dex_pc);
         }
         context->last_pc_valid = false;
@@ -3769,8 +3769,7 @@
   if (m != nullptr && !m->IsNative()) {
     const DexFile::CodeItem* const code_item = m->GetCodeItem();
     DebugCallbackContext context(single_step_control, line_number, code_item);
-    m->GetDexFile()->DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
-                                     DebugCallbackContext::Callback, nullptr, &context);
+    m->GetDexFile()->DecodeDebugPositionInfo(code_item, DebugCallbackContext::Callback, &context);
   }
 
   // Activate single-step in the thread.
@@ -4756,12 +4755,7 @@
   // Send a series of heap segment chunks.
   HeapChunkContext context(what == HPSG_WHAT_MERGED_OBJECTS, native);
   if (native) {
-#if defined(__ANDROID__) && defined(USE_DLMALLOC)
-    dlmalloc_inspect_all(HeapChunkContext::HeapChunkNativeCallback, &context);
-    HeapChunkContext::HeapChunkNativeCallback(nullptr, nullptr, 0, &context);  // Indicate end of a space.
-#else
-    UNIMPLEMENTED(WARNING) << "Native heap inspection is only supported with dlmalloc";
-#endif
+    UNIMPLEMENTED(WARNING) << "Native heap inspection is not supported";
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     for (const auto& space : heap->GetContinuousSpaces()) {
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 4e15e80..880d3e0 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -767,8 +767,7 @@
 
   // A method with no line number info should return -1
   LineNumFromPcContext context(rel_pc, -1);
-  DecodeDebugInfo(code_item, method->IsStatic(), method->GetDexMethodIndex(), LineNumForPcCb,
-                  nullptr, &context);
+  DecodeDebugPositionInfo(code_item, LineNumForPcCb, &context);
   return context.line_num_;
 }
 
@@ -805,45 +804,48 @@
   }
 }
 
-void DexFile::DecodeDebugInfo0(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-                               DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-                               void* context, const uint8_t* stream, LocalInfo* local_in_reg)
-    const {
-  uint32_t line = DecodeUnsignedLeb128(&stream);
-  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
-  uint16_t arg_reg = code_item->registers_size_ - code_item->ins_size_;
-  uint32_t address = 0;
-  bool need_locals = (local_cb != nullptr);
+bool DexFile::DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
+                                   DexDebugNewLocalCb local_cb, void* context) const {
+  DCHECK(local_cb != nullptr);
+  if (code_item == nullptr) {
+    return false;
+  }
+  const uint8_t* stream = GetDebugInfoStream(code_item);
+  if (stream == nullptr) {
+    return false;
+  }
+  std::vector<LocalInfo> local_in_reg(code_item->registers_size_);
 
+  uint16_t arg_reg = code_item->registers_size_ - code_item->ins_size_;
   if (!is_static) {
-    if (need_locals) {
-      const char* descriptor = GetMethodDeclaringClassDescriptor(GetMethodId(method_idx));
-      local_in_reg[arg_reg].name_ = "this";
-      local_in_reg[arg_reg].descriptor_ = descriptor;
-      local_in_reg[arg_reg].signature_ = nullptr;
-      local_in_reg[arg_reg].start_address_ = 0;
-      local_in_reg[arg_reg].is_live_ = true;
-    }
+    const char* descriptor = GetMethodDeclaringClassDescriptor(GetMethodId(method_idx));
+    local_in_reg[arg_reg].name_ = "this";
+    local_in_reg[arg_reg].descriptor_ = descriptor;
+    local_in_reg[arg_reg].signature_ = nullptr;
+    local_in_reg[arg_reg].start_address_ = 0;
+    local_in_reg[arg_reg].reg_ = arg_reg;
+    local_in_reg[arg_reg].is_live_ = true;
     arg_reg++;
   }
 
   DexFileParameterIterator it(*this, GetMethodPrototype(GetMethodId(method_idx)));
-  for (uint32_t i = 0; i < parameters_size && it.HasNext(); ++i, it.Next()) {
+  DecodeUnsignedLeb128(&stream);  // Line.
+  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+  uint32_t i;
+  for (i = 0; i < parameters_size && it.HasNext(); ++i, it.Next()) {
     if (arg_reg >= code_item->registers_size_) {
       LOG(ERROR) << "invalid stream - arg reg >= reg size (" << arg_reg
                  << " >= " << code_item->registers_size_ << ") in " << GetLocation();
-      return;
+      return false;
     }
-    uint32_t id = DecodeUnsignedLeb128P1(&stream);
+    uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
     const char* descriptor = it.GetDescriptor();
-    if (need_locals && id != kDexNoIndex) {
-      const char* name = StringDataByIdx(id);
-      local_in_reg[arg_reg].name_ = name;
-      local_in_reg[arg_reg].descriptor_ = descriptor;
-      local_in_reg[arg_reg].signature_ = nullptr;
-      local_in_reg[arg_reg].start_address_ = address;
-      local_in_reg[arg_reg].is_live_ = true;
-    }
+    local_in_reg[arg_reg].name_ = StringDataByIdx(name_idx);
+    local_in_reg[arg_reg].descriptor_ = descriptor;
+    local_in_reg[arg_reg].signature_ = nullptr;
+    local_in_reg[arg_reg].start_address_ = 0;
+    local_in_reg[arg_reg].reg_ = arg_reg;
+    local_in_reg[arg_reg].is_live_ = true;
     switch (*descriptor) {
       case 'D':
       case 'J':
@@ -854,152 +856,188 @@
         break;
     }
   }
-
-  if (it.HasNext()) {
+  if (i != parameters_size || it.HasNext()) {
     LOG(ERROR) << "invalid stream - problem with parameter iterator in " << GetLocation()
                << " for method " << PrettyMethod(method_idx, *this);
-    return;
+    return false;
   }
 
+  uint32_t address = 0;
   for (;;)  {
     uint8_t opcode = *stream++;
-    uint16_t reg;
-    uint32_t name_idx;
-    uint32_t descriptor_idx;
-    uint32_t signature_idx = 0;
-
     switch (opcode) {
       case DBG_END_SEQUENCE:
-        return;
-
+        // Emit all variables which are still alive at the end of the method.
+        for (uint16_t reg = 0; reg < code_item->registers_size_; reg++) {
+          if (local_in_reg[reg].is_live_) {
+            local_in_reg[reg].end_address_ = code_item->insns_size_in_code_units_;
+            local_cb(context, local_in_reg[reg]);
+          }
+        }
+        return true;
       case DBG_ADVANCE_PC:
         address += DecodeUnsignedLeb128(&stream);
         break;
-
       case DBG_ADVANCE_LINE:
-        line += DecodeSignedLeb128(&stream);
+        DecodeSignedLeb128(&stream);  // Line.
         break;
-
       case DBG_START_LOCAL:
-      case DBG_START_LOCAL_EXTENDED:
-        reg = DecodeUnsignedLeb128(&stream);
-        if (reg > code_item->registers_size_) {
-          LOG(ERROR) << "invalid stream - reg > reg size (" << reg << " > "
+      case DBG_START_LOCAL_EXTENDED: {
+        uint16_t reg = DecodeUnsignedLeb128(&stream);
+        if (reg >= code_item->registers_size_) {
+          LOG(ERROR) << "invalid stream - reg >= reg size (" << reg << " >= "
                      << code_item->registers_size_ << ") in " << GetLocation();
-          return;
+          return false;
         }
 
-        name_idx = DecodeUnsignedLeb128P1(&stream);
-        descriptor_idx = DecodeUnsignedLeb128P1(&stream);
+        uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
+        uint32_t descriptor_idx = DecodeUnsignedLeb128P1(&stream);
+        uint32_t signature_idx = kDexNoIndex;
         if (opcode == DBG_START_LOCAL_EXTENDED) {
           signature_idx = DecodeUnsignedLeb128P1(&stream);
         }
 
         // Emit what was previously there, if anything
-        if (need_locals) {
-          InvokeLocalCbIfLive(context, reg, address, local_in_reg, local_cb);
+        if (local_in_reg[reg].is_live_) {
+          local_in_reg[reg].end_address_ = address;
+          local_cb(context, local_in_reg[reg]);
+        }
 
-          local_in_reg[reg].name_ = StringDataByIdx(name_idx);
-          local_in_reg[reg].descriptor_ = StringByTypeIdx(descriptor_idx);
-          local_in_reg[reg].signature_ =
-              (opcode == DBG_START_LOCAL_EXTENDED) ? StringDataByIdx(signature_idx)
-                                                   : nullptr;
+        local_in_reg[reg].name_ = StringDataByIdx(name_idx);
+        local_in_reg[reg].descriptor_ = StringByTypeIdx(descriptor_idx);
+        local_in_reg[reg].signature_ = StringDataByIdx(signature_idx);
+        local_in_reg[reg].start_address_ = address;
+        local_in_reg[reg].reg_ = reg;
+        local_in_reg[reg].is_live_ = true;
+        break;
+      }
+      case DBG_END_LOCAL: {
+        uint16_t reg = DecodeUnsignedLeb128(&stream);
+        if (reg >= code_item->registers_size_) {
+          LOG(ERROR) << "invalid stream - reg >= reg size (" << reg << " >= "
+                     << code_item->registers_size_ << ") in " << GetLocation();
+          return false;
+        }
+        if (!local_in_reg[reg].is_live_) {
+          LOG(ERROR) << "invalid stream - end without start in " << GetLocation();
+          return false;
+        }
+        local_in_reg[reg].end_address_ = address;
+        local_cb(context, local_in_reg[reg]);
+        local_in_reg[reg].is_live_ = false;
+        break;
+      }
+      case DBG_RESTART_LOCAL: {
+        uint16_t reg = DecodeUnsignedLeb128(&stream);
+        if (reg >= code_item->registers_size_) {
+          LOG(ERROR) << "invalid stream - reg >= reg size (" << reg << " >= "
+                     << code_item->registers_size_ << ") in " << GetLocation();
+          return false;
+        }
+        // If the register is live, the "restart" is superfluous,
+        // and we don't want to mess with the existing start address.
+        if (!local_in_reg[reg].is_live_) {
           local_in_reg[reg].start_address_ = address;
           local_in_reg[reg].is_live_ = true;
         }
         break;
-
-      case DBG_END_LOCAL:
-        reg = DecodeUnsignedLeb128(&stream);
-        if (reg > code_item->registers_size_) {
-          LOG(ERROR) << "invalid stream - reg > reg size (" << reg << " > "
-                     << code_item->registers_size_ << ") in " << GetLocation();
-          return;
-        }
-
-        if (need_locals) {
-          InvokeLocalCbIfLive(context, reg, address, local_in_reg, local_cb);
-          local_in_reg[reg].is_live_ = false;
-        }
-        break;
-
-      case DBG_RESTART_LOCAL:
-        reg = DecodeUnsignedLeb128(&stream);
-        if (reg > code_item->registers_size_) {
-          LOG(ERROR) << "invalid stream - reg > reg size (" << reg << " > "
-                     << code_item->registers_size_ << ") in " << GetLocation();
-          return;
-        }
-
-        if (need_locals) {
-          if (local_in_reg[reg].name_ == nullptr || local_in_reg[reg].descriptor_ == nullptr) {
-            LOG(ERROR) << "invalid stream - no name or descriptor in " << GetLocation();
-            return;
-          }
-
-          // If the register is live, the "restart" is superfluous,
-          // and we don't want to mess with the existing start address.
-          if (!local_in_reg[reg].is_live_) {
-            local_in_reg[reg].start_address_ = address;
-            local_in_reg[reg].is_live_ = true;
-          }
-        }
-        break;
-
+      }
       case DBG_SET_PROLOGUE_END:
       case DBG_SET_EPILOGUE_BEGIN:
-      case DBG_SET_FILE:
         break;
+      case DBG_SET_FILE:
+        DecodeUnsignedLeb128P1(&stream);  // name.
+        break;
+      default:
+        address += (opcode - DBG_FIRST_SPECIAL) / DBG_LINE_RANGE;
+        break;
+    }
+  }
+}
 
+bool DexFile::DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
+                                      void* context) const {
+  DCHECK(position_cb != nullptr);
+  if (code_item == nullptr) {
+    return false;
+  }
+  const uint8_t* stream = GetDebugInfoStream(code_item);
+  if (stream == nullptr) {
+    return false;
+  }
+
+  PositionInfo entry = PositionInfo();
+  entry.line_ = DecodeUnsignedLeb128(&stream);
+  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+  for (uint32_t i = 0; i < parameters_size; ++i) {
+    DecodeUnsignedLeb128P1(&stream);  // Parameter name.
+  }
+
+  for (;;)  {
+    uint8_t opcode = *stream++;
+    switch (opcode) {
+      case DBG_END_SEQUENCE:
+        return true;  // end of stream.
+      case DBG_ADVANCE_PC:
+        entry.address_ += DecodeUnsignedLeb128(&stream);
+        break;
+      case DBG_ADVANCE_LINE:
+        entry.line_ += DecodeSignedLeb128(&stream);
+        break;
+      case DBG_START_LOCAL:
+        DecodeUnsignedLeb128(&stream);  // reg.
+        DecodeUnsignedLeb128P1(&stream);  // name.
+        DecodeUnsignedLeb128P1(&stream);  // descriptor.
+        break;
+      case DBG_START_LOCAL_EXTENDED:
+        DecodeUnsignedLeb128(&stream);  // reg.
+        DecodeUnsignedLeb128P1(&stream);  // name.
+        DecodeUnsignedLeb128P1(&stream);  // descriptor.
+        DecodeUnsignedLeb128P1(&stream);  // signature.
+        break;
+      case DBG_END_LOCAL:
+      case DBG_RESTART_LOCAL:
+        DecodeUnsignedLeb128(&stream);  // reg.
+        break;
+      case DBG_SET_PROLOGUE_END:
+        entry.prologue_end_ = true;
+        break;
+      case DBG_SET_EPILOGUE_BEGIN:
+        entry.epilogue_begin_ = true;
+        break;
+      case DBG_SET_FILE: {
+        uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
+        entry.source_file_ = StringDataByIdx(name_idx);
+        break;
+      }
       default: {
         int adjopcode = opcode - DBG_FIRST_SPECIAL;
-
-        address += adjopcode / DBG_LINE_RANGE;
-        line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
-
-        if (position_cb != nullptr) {
-          if (position_cb(context, address, line)) {
-            // early exit
-            return;
-          }
+        entry.address_ += adjopcode / DBG_LINE_RANGE;
+        entry.line_ += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
+        if (position_cb(context, entry)) {
+          return true;  // early exit.
         }
+        entry.prologue_end_ = false;
+        entry.epilogue_begin_ = false;
         break;
       }
     }
   }
 }
 
-void DexFile::DecodeDebugInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-                              DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-                              void* context) const {
-  DCHECK(code_item != nullptr);
-  const uint8_t* stream = GetDebugInfoStream(code_item);
-  std::unique_ptr<LocalInfo[]> local_in_reg(local_cb != nullptr ?
-                                      new LocalInfo[code_item->registers_size_] :
-                                      nullptr);
-  if (stream != nullptr) {
-    DecodeDebugInfo0(code_item, is_static, method_idx, position_cb, local_cb, context, stream,
-                     &local_in_reg[0]);
-  }
-  for (int reg = 0; reg < code_item->registers_size_; reg++) {
-    InvokeLocalCbIfLive(context, reg, code_item->insns_size_in_code_units_, &local_in_reg[0],
-                        local_cb);
-  }
-}
-
-bool DexFile::LineNumForPcCb(void* raw_context, uint32_t address, uint32_t line_num) {
+bool DexFile::LineNumForPcCb(void* raw_context, const PositionInfo& entry) {
   LineNumFromPcContext* context = reinterpret_cast<LineNumFromPcContext*>(raw_context);
 
   // We know that this callback will be called in
   // ascending address order, so keep going until we find
   // a match or we've just gone past it.
-  if (address > context->address_) {
+  if (entry.address_ > context->address_) {
     // The line number from the previous positions callback
     // wil be the final result.
     return true;
   } else {
-    context->line_num_ = line_num;
-    return address == context->address_;
+    context->line_num_ = entry.line_;
+    return entry.address_ == context->address_;
   }
 }
 
@@ -2210,22 +2248,47 @@
 EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
     const DexFile& dex_file,
     const DexFile::ClassDef& class_def)
-    : EncodedStaticFieldValueIterator(dex_file, nullptr, nullptr,
-                                      nullptr, class_def) {
+    : EncodedStaticFieldValueIterator(dex_file,
+                                      nullptr,
+                                      nullptr,
+                                      nullptr,
+                                      class_def,
+                                      -1,
+                                      kByte) {
 }
 
 EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
-    const DexFile& dex_file, Handle<mirror::DexCache>* dex_cache,
-    Handle<mirror::ClassLoader>* class_loader, ClassLinker* linker,
+    const DexFile& dex_file,
+    Handle<mirror::DexCache>* dex_cache,
+    Handle<mirror::ClassLoader>* class_loader,
+    ClassLinker* linker,
     const DexFile::ClassDef& class_def)
+    : EncodedStaticFieldValueIterator(dex_file,
+                                      dex_cache, class_loader,
+                                      linker,
+                                      class_def,
+                                      -1,
+                                      kByte) {
+  DCHECK(dex_cache_ != nullptr);
+  DCHECK(class_loader_ != nullptr);
+}
+
+EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(
+    const DexFile& dex_file,
+    Handle<mirror::DexCache>* dex_cache,
+    Handle<mirror::ClassLoader>* class_loader,
+    ClassLinker* linker,
+    const DexFile::ClassDef& class_def,
+    size_t pos,
+    ValueType type)
     : dex_file_(dex_file),
       dex_cache_(dex_cache),
       class_loader_(class_loader),
       linker_(linker),
       array_size_(),
-      pos_(-1),
-      type_(kByte) {
-  ptr_ = dex_file_.GetEncodedStaticFieldValuesArray(class_def);
+      pos_(pos),
+      type_(type) {
+  ptr_ = dex_file.GetEncodedStaticFieldValuesArray(class_def);
   if (ptr_ == nullptr) {
     array_size_ = 0;
   } else {
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 6b019f1..8a3db6c 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -819,20 +819,50 @@
     }
   }
 
+  struct PositionInfo {
+    PositionInfo()
+        : address_(0),
+          line_(0),
+          source_file_(nullptr),
+          prologue_end_(false),
+          epilogue_begin_(false) {
+    }
+
+    uint32_t address_;  // In 16-bit code units.
+    uint32_t line_;  // Source code line number starting at 1.
+    const char* source_file_;  // nullptr if the file from ClassDef still applies.
+    bool prologue_end_;
+    bool epilogue_begin_;
+  };
+
   // Callback for "new position table entry".
   // Returning true causes the decoder to stop early.
-  typedef bool (*DexDebugNewPositionCb)(void* context, uint32_t address, uint32_t line_num);
+  typedef bool (*DexDebugNewPositionCb)(void* context, const PositionInfo& entry);
 
-  // Callback for "new locals table entry". "signature" is an empty string
-  // if no signature is available for an entry.
-  typedef void (*DexDebugNewLocalCb)(void* context, uint16_t reg,
-                                     uint32_t start_address,
-                                     uint32_t end_address,
-                                     const char* name,
-                                     const char* descriptor,
-                                     const char* signature);
+  struct LocalInfo {
+    LocalInfo()
+        : name_(nullptr),
+          descriptor_(nullptr),
+          signature_(nullptr),
+          start_address_(0),
+          end_address_(0),
+          reg_(0),
+          is_live_(false) {
+    }
 
-  static bool LineNumForPcCb(void* context, uint32_t address, uint32_t line_num);
+    const char* name_;  // E.g., list.  It can be nullptr if unknown.
+    const char* descriptor_;  // E.g., Ljava/util/LinkedList;
+    const char* signature_;  // E.g., java.util.LinkedList<java.lang.Integer>
+    uint32_t start_address_;  // PC location where the local is first defined.
+    uint32_t end_address_;  // PC location where the local is no longer defined.
+    uint16_t reg_;  // Dex register which stores the values.
+    bool is_live_;  // Is the local defined and live.
+  };
+
+  // Callback for "new locals table entry".
+  typedef void (*DexDebugNewLocalCb)(void* context, const LocalInfo& entry);
+
+  static bool LineNumForPcCb(void* context, const PositionInfo& entry);
 
   const AnnotationsDirectoryItem* GetAnnotationsDirectory(const ClassDef& class_def) const {
     if (class_def.annotations_off_ == 0) {
@@ -1044,21 +1074,6 @@
     DBG_LINE_RANGE           = 15,
   };
 
-  struct LocalInfo {
-    LocalInfo()
-        : name_(nullptr), descriptor_(nullptr), signature_(nullptr), start_address_(0),
-          is_live_(false) {}
-
-    const char* name_;  // E.g., list
-    const char* descriptor_;  // E.g., Ljava/util/LinkedList;
-    const char* signature_;  // E.g., java.util.LinkedList<java.lang.Integer>
-    uint16_t start_address_;  // PC location where the local is first defined.
-    bool is_live_;  // Is the local defined and live.
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(LocalInfo);
-  };
-
   struct LineNumFromPcContext {
     LineNumFromPcContext(uint32_t address, uint32_t line_num)
         : address_(address), line_num_(line_num) {}
@@ -1068,15 +1083,6 @@
     DISALLOW_COPY_AND_ASSIGN(LineNumFromPcContext);
   };
 
-  void InvokeLocalCbIfLive(void* context, int reg, uint32_t end_address,
-                           LocalInfo* local_in_reg, DexDebugNewLocalCb local_cb) const {
-    if (local_cb != nullptr && local_in_reg[reg].is_live_) {
-      local_cb(context, reg, local_in_reg[reg].start_address_, end_address,
-          local_in_reg[reg].name_, local_in_reg[reg].descriptor_,
-          local_in_reg[reg].signature_ != nullptr ? local_in_reg[reg].signature_ : "");
-    }
-  }
-
   // Determine the source file line number based on the program counter.
   // "pc" is an offset, in 16-bit units, from the start of the method's code.
   //
@@ -1088,9 +1094,13 @@
   int32_t GetLineNumFromPC(ArtMethod* method, uint32_t rel_pc) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void DecodeDebugInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-                       DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-                       void* context) const;
+  // Returns false if there is no debugging information or if it can not be decoded.
+  bool DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
+                            DexDebugNewLocalCb local_cb, void* context) const;
+
+  // Returns false if there is no debugging information or if it can not be decoded.
+  bool DecodeDebugPositionInfo(const CodeItem* code_item, DexDebugNewPositionCb position_cb,
+                               void* context) const;
 
   const char* GetSourceFile(const ClassDef& class_def) const {
     if (class_def.source_file_idx_ == 0xffffffff) {
@@ -1200,10 +1210,6 @@
   // Returns true if the header magic and version numbers are of the expected values.
   bool CheckMagicAndVersion(std::string* error_msg) const;
 
-  void DecodeDebugInfo0(const CodeItem* code_item, bool is_static, uint32_t method_idx,
-      DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
-      void* context, const uint8_t* stream, LocalInfo* local_in_reg) const;
-
   // Check whether a location denotes a multidex dex file. This is a very simple check: returns
   // whether the string contains the separator character.
   static bool IsMultiDexLocation(const char* location);
@@ -1275,6 +1281,7 @@
     }
   }
   bool HasNext() const { return pos_ < size_; }
+  size_t Size() const { return size_; }
   void Next() { ++pos_; }
   uint16_t GetTypeIdx() {
     return type_list_->GetTypeItem(pos_).type_idx_;
@@ -1516,9 +1523,11 @@
                                   const DexFile::ClassDef& class_def);
 
   // A constructor meant to be called from runtime code.
-  EncodedStaticFieldValueIterator(const DexFile& dex_file, Handle<mirror::DexCache>* dex_cache,
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  Handle<mirror::DexCache>* dex_cache,
                                   Handle<mirror::ClassLoader>* class_loader,
-                                  ClassLinker* linker, const DexFile::ClassDef& class_def)
+                                  ClassLinker* linker,
+                                  const DexFile::ClassDef& class_def)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<bool kTransactionActive>
@@ -1551,6 +1560,14 @@
   const jvalue& GetJavaValue() const { return jval_; }
 
  private:
+  EncodedStaticFieldValueIterator(const DexFile& dex_file,
+                                  Handle<mirror::DexCache>* dex_cache,
+                                  Handle<mirror::ClassLoader>* class_loader,
+                                  ClassLinker* linker,
+                                  const DexFile::ClassDef& class_def,
+                                  size_t pos,
+                                  ValueType type);
+
   static constexpr uint8_t kEncodedValueTypeMask = 0x1f;  // 0b11111
   static constexpr uint8_t kEncodedValueArgShift = 5;
 
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 27865e3..f5b68fa 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -79,11 +79,17 @@
 // functions directly.  For x86 and x86-64, compilers need a wrapper
 // assembly function, to handle mismatch in ABI.
 
+// Mark the heap reference `obj`. This entry point is used by read
+// barrier fast path implementations generated by the compiler to mark
+// an object that is referenced by a field of a gray object.
+extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj)
+    SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
+
 // Read barrier entrypoint for heap references.
-// This is the read barrier slow path for instance and static fields and reference-type arrays.
-// TODO: Currently the read barrier does not have a fast path for compilers to directly generate.
-// Ideally the slow path should only take one parameter "ref".
-extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref, mirror::Object* obj,
+// This is the read barrier slow path for instance and static fields
+// and reference type arrays.
+extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+                                              mirror::Object* obj,
                                               uint32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
 
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 3eea723..faa4747 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -163,6 +163,7 @@
   V(NewStringFromStringBuilder, void) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
+  V(ReadBarrierMark, mirror::Object*, mirror::Object*) \
   V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
   V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*)
 
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index a6b84b6..1850254 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -559,8 +559,11 @@
   return -1;  // failure
 }
 
-// TODO: Currently the read barrier does not have a fast path. Ideally the slow path should only
-// take one parameter "ref", which is given by the fast path.
+extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj) {
+  DCHECK(kEmitCompilerReadBarrier);
+  return ReadBarrier::Mark(obj);
+}
+
 extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
                                               mirror::Object* obj,
                                               uint32_t offset) {
@@ -579,7 +582,6 @@
 
 extern "C" mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root) {
   DCHECK(kEmitCompilerReadBarrier);
-  // TODO: Pass a GcRootSource object as second argument to GcRoot::Read?
   return root->Read();
 }
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 391eb72..dc9f14c 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -318,7 +318,8 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierSlow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMark, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMark, pReadBarrierSlow, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow,
                          sizeof(void*));
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index da9a79e..6d72f31 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1352,28 +1352,10 @@
   uint64_t gc_heap_end_ns = NanoTime();
   // We never move things in the native heap, so we can finish the GC at this point.
   FinishGC(self, collector::kGcTypeNone);
-  size_t native_reclaimed = 0;
 
-#ifdef __ANDROID__
-  // Only trim the native heap if we don't care about pauses.
-  if (!CareAboutPauseTimes()) {
-#if defined(USE_DLMALLOC)
-    // Trim the native heap.
-    dlmalloc_trim(0);
-    dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
-#elif defined(USE_JEMALLOC)
-    // Jemalloc does it's own internal trimming.
-#else
-    UNIMPLEMENTED(WARNING) << "Add trimming support";
-#endif
-  }
-#endif  // __ANDROID__
-  uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
-      << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
-      << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed)
-      << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization)
-      << "%.";
+      << ", advised=" << PrettySize(managed_reclaimed) << ") heap. Managed heap utilization of "
+      << static_cast<int>(100 * managed_utilization) << "%.";
   ATRACE_END();
 }
 
diff --git a/runtime/image.h b/runtime/image.h
index 3032beb..7418f66 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -262,7 +262,7 @@
   // Boolean (0 or 1) to denote if the image was compiled with --compile-pic option
   const uint32_t compile_pic_;
 
-  // Image sections
+  // Image section sizes/offsets correspond to the uncompressed form.
   ImageSection sections_[kSectionCount];
 
   // Image methods.
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 7cc05f7..15f5122 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -17,6 +17,7 @@
 #include "jni_internal.h"
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
+
 #include <cutils/trace.h>
 #include <dlfcn.h>
 
@@ -31,6 +32,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "nativebridge/native_bridge.h"
+#include "nativeloader/native_loader.h"
 #include "java_vm_ext.h"
 #include "parsed_options.h"
 #include "runtime-inl.h"
@@ -715,6 +717,7 @@
 }
 
 bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader,
+                                  jstring library_path, jstring permitted_path,
                                   std::string* error_msg) {
   error_msg->clear();
 
@@ -774,7 +777,8 @@
 
   Locks::mutator_lock_->AssertNotHeld(self);
   const char* path_str = path.empty() ? nullptr : path.c_str();
-  void* handle = dlopen(path_str, RTLD_NOW);
+  void* handle = android::OpenNativeLibrary(env, runtime_->GetTargetSdkVersion(),
+                                            path_str, class_loader, library_path, permitted_path);
   bool needs_native_bridge = false;
   if (handle == nullptr) {
     if (android::NativeBridgeIsSupported(path_str)) {
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 618f6fa..8559769 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -82,11 +82,11 @@
   /**
    * Loads the given shared library. 'path' is an absolute pathname.
    *
-   * Returns 'true' on success. On failure, sets 'detail' to a
+   * Returns 'true' on success. On failure, sets 'error_msg' to a
    * human-readable description of the error.
    */
   bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject javaLoader,
-                         std::string* error_msg);
+                         jstring library_path, jstring permitted_path, std::string* error_msg);
 
   // Unload native libraries with cleared class loaders.
   void UnloadNativeLibraries()
diff --git a/runtime/jdwp/jdwp_expand_buf.cc b/runtime/jdwp/jdwp_expand_buf.cc
index e492d7e..961dd36 100644
--- a/runtime/jdwp/jdwp_expand_buf.cc
+++ b/runtime/jdwp/jdwp_expand_buf.cc
@@ -164,7 +164,7 @@
  * have stored null bytes in a multi-byte encoding).
  */
 void expandBufAddUtf8String(ExpandBuf* pBuf, const char* s) {
-  int strLen = strlen(s);
+  int strLen = (s != nullptr ? strlen(s) : 0);
   ensureSpace(pBuf, sizeof(uint32_t) + strLen);
   SetUtf8String(pBuf->storage + pBuf->curLen, s, strLen);
   pBuf->curLen += sizeof(uint32_t) + strLen;
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 92aa86e..a653440 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -188,7 +188,7 @@
 
   uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs();
   if (offline_profile_info_->NeedsSaving(last_update_ns)) {
-    VLOG(profiler) << "Iniate save profiling information to: " << filename;
+    VLOG(profiler) << "Initiate save profiling information to: " << filename;
     std::set<ArtMethod*> methods;
     {
       ScopedObjectAccess soa(Thread::Current());
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 4032c7b..4e82916 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -23,7 +23,6 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc/accounting/bitmap.h"
-#include "gc/allocator/dlmalloc.h"
 #include "gc_root.h"
 #include "jni.h"
 #include "oat_file.h"
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index 4450653..7615870 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -68,7 +68,6 @@
   }
 }
 
-
 void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) {
   DCHECK(method != nullptr);
   const DexFile* dex_file = method->GetDexFile();
@@ -80,11 +79,25 @@
   info_it->second.insert(method->GetDexMethodIndex());
 }
 
-static int OpenOrCreateFile(const std::string& filename) {
-  // TODO(calin) allow the shared uid of the app to access the file.
-  int fd = open(filename.c_str(),
-                O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
-                S_IRUSR | S_IWUSR);
+enum OpenMode {
+  READ,
+  READ_WRITE
+};
+
+static int OpenFile(const std::string& filename, OpenMode open_mode) {
+  int fd = -1;
+  switch (open_mode) {
+    case READ:
+      fd = open(filename.c_str(), O_RDONLY);
+      break;
+    case READ_WRITE:
+      // TODO(calin) allow the shared uid of the app to access the file.
+      fd = open(filename.c_str(),
+                    O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
+                    S_IRUSR | S_IWUSR);
+      break;
+  }
+
   if (fd < 0) {
     PLOG(WARNING) << "Failed to open profile file " << filename;
     return -1;
@@ -96,7 +109,6 @@
     PLOG(WARNING) << "Failed to lock profile file " << filename;
     return -1;
   }
-
   return fd;
 }
 
@@ -129,8 +141,8 @@
   } while (length > 0);
 }
 
-static constexpr char kFieldSeparator = ',';
-static constexpr char kLineSeparator = '\n';
+static constexpr const char kFieldSeparator = ',';
+static constexpr const char kLineSeparator = '\n';
 
 /**
  * Serialization format:
@@ -142,7 +154,7 @@
  **/
 bool OfflineProfilingInfo::Serialize(const std::string& filename,
                                      const DexFileToMethodsMap& info) const {
-  int fd = OpenOrCreateFile(filename);
+  int fd = OpenFile(filename, READ_WRITE);
   if (fd == -1) {
     return false;
   }
@@ -168,4 +180,212 @@
 
   return CloseDescriptorForFile(fd, filename);
 }
+
+// TODO(calin): This a duplicate of Utils::Split fixing the case where the first character
+// is the separator. Merge the fix into Utils::Split once verified that it doesn't break its users.
+static void SplitString(const std::string& s, char separator, std::vector<std::string>* result) {
+  const char* p = s.data();
+  const char* end = p + s.size();
+  // Check if the first character is the separator.
+  if (p != end && *p ==separator) {
+    result->push_back("");
+    ++p;
+  }
+  // Process the rest of the characters.
+  while (p != end) {
+    if (*p == separator) {
+      ++p;
+    } else {
+      const char* start = p;
+      while (++p != end && *p != separator) {
+        // Skip to the next occurrence of the separator.
+      }
+      result->push_back(std::string(start, p - start));
+    }
+  }
+}
+
+bool ProfileCompilationInfo::ProcessLine(const std::string& line,
+                                         const std::vector<const DexFile*>& dex_files) {
+  std::vector<std::string> parts;
+  SplitString(line, kFieldSeparator, &parts);
+  if (parts.size() < 3) {
+    LOG(WARNING) << "Invalid line: " << line;
+    return false;
+  }
+
+  const std::string& multidex_suffix = parts[0];
+  uint32_t checksum;
+  if (!ParseInt(parts[1].c_str(), &checksum)) {
+    return false;
+  }
+
+  const DexFile* current_dex_file = nullptr;
+  for (auto dex_file : dex_files) {
+    if (DexFile::GetMultiDexSuffix(dex_file->GetLocation()) == multidex_suffix) {
+      if (checksum != dex_file->GetLocationChecksum()) {
+        LOG(WARNING) << "Checksum mismatch for "
+            << dex_file->GetLocation() << " when parsing " << filename_;
+        return false;
+      }
+      current_dex_file = dex_file;
+      break;
+    }
+  }
+  if (current_dex_file == nullptr) {
+    return true;
+  }
+
+  for (size_t i = 2; i < parts.size(); i++) {
+    uint32_t method_idx;
+    if (!ParseInt(parts[i].c_str(), &method_idx)) {
+      LOG(WARNING) << "Cannot parse method_idx " << parts[i];
+      return false;
+    }
+    uint16_t class_idx = current_dex_file->GetMethodId(method_idx).class_idx_;
+    auto info_it = info_.find(current_dex_file);
+    if (info_it == info_.end()) {
+      info_it = info_.Put(current_dex_file, ClassToMethodsMap());
+    }
+    ClassToMethodsMap& class_map = info_it->second;
+    auto class_it = class_map.find(class_idx);
+    if (class_it == class_map.end()) {
+      class_it = class_map.Put(class_idx, std::set<uint32_t>());
+    }
+    class_it->second.insert(method_idx);
+  }
+  return true;
+}
+
+// Parses the buffer (of length n) starting from start_from and identify new lines
+// based on kLineSeparator marker.
+// Returns the first position after kLineSeparator in the buffer (starting from start_from),
+// or -1 if the marker doesn't appear.
+// The processed characters are appended to the given line.
+static int GetLineFromBuffer(char* buffer, int n, int start_from, std::string& line) {
+  if (start_from >= n) {
+    return -1;
+  }
+  int new_line_pos = -1;
+  for (int i = start_from; i < n; i++) {
+    if (buffer[i] == kLineSeparator) {
+      new_line_pos = i;
+      break;
+    }
+  }
+  int append_limit = new_line_pos == -1 ? n : new_line_pos;
+  line.append(buffer + start_from, append_limit - start_from);
+  // Jump over kLineSeparator and return the position of the next character.
+  return new_line_pos == -1 ? new_line_pos : new_line_pos + 1;
+}
+
+bool ProfileCompilationInfo::Load(const std::vector<const DexFile*>& dex_files) {
+  if (dex_files.empty()) {
+    return true;
+  }
+  if (kIsDebugBuild) {
+    // In debug builds verify that the multidex suffixes are unique.
+    std::set<std::string> suffixes;
+    for (auto dex_file : dex_files) {
+      std::string multidex_suffix = DexFile::GetMultiDexSuffix(dex_file->GetLocation());
+      DCHECK(suffixes.find(multidex_suffix) == suffixes.end())
+          << "DexFiles appear to belong to different apks."
+          << " There are multiple dex files with the same multidex suffix: "
+          << multidex_suffix;
+      suffixes.insert(multidex_suffix);
+    }
+  }
+  info_.clear();
+
+  int fd = OpenFile(filename_, READ);
+  if (fd == -1) {
+    return false;
+  }
+
+  std::string current_line;
+  const int kBufferSize = 1024;
+  char buffer[kBufferSize];
+  bool success = true;
+
+  while (success) {
+    int n = read(fd, buffer, kBufferSize);
+    if (n < 0) {
+      PLOG(WARNING) << "Error when reading profile file " << filename_;
+      success = false;
+      break;
+    } else if (n == 0) {
+      break;
+    }
+    // Detect the new lines from the buffer. If we manage to complete a line,
+    // process it. Otherwise append to the current line.
+    int current_start_pos = 0;
+    while (current_start_pos < n) {
+      current_start_pos = GetLineFromBuffer(buffer, n, current_start_pos, current_line);
+      if (current_start_pos == -1) {
+        break;
+      }
+      if (!ProcessLine(current_line, dex_files)) {
+        success = false;
+        break;
+      }
+      // Reset the current line (we just processed it).
+      current_line.clear();
+    }
+  }
+  if (!success) {
+    info_.clear();
+  }
+  return CloseDescriptorForFile(fd, filename_) && success;
+}
+
+bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
+  auto info_it = info_.find(method_ref.dex_file);
+  if (info_it != info_.end()) {
+    uint16_t class_idx = method_ref.dex_file->GetMethodId(method_ref.dex_method_index).class_idx_;
+    const ClassToMethodsMap& class_map = info_it->second;
+    auto class_it = class_map.find(class_idx);
+    if (class_it != class_map.end()) {
+      const std::set<uint32_t>& methods = class_it->second;
+      return methods.find(method_ref.dex_method_index) != methods.end();
+    }
+    return false;
+  }
+  return false;
+}
+
+std::string ProfileCompilationInfo::DumpInfo(bool print_full_dex_location) const {
+  std::ostringstream os;
+  if (info_.empty()) {
+    return "ProfileInfo: empty";
+  }
+
+  os << "ProfileInfo:";
+
+  // Use an additional map to achieve a predefined order based on the dex locations.
+  SafeMap<const std::string, const DexFile*> dex_locations_map;
+  for (auto info_it : info_) {
+    dex_locations_map.Put(info_it.first->GetLocation(), info_it.first);
+  }
+
+  const std::string kFirstDexFileKeySubstitute = ":classes.dex";
+  for (auto dex_file_it : dex_locations_map) {
+    os << "\n";
+    const std::string& location = dex_file_it.first;
+    const DexFile* dex_file = dex_file_it.second;
+    if (print_full_dex_location) {
+      os << location;
+    } else {
+      // Replace the (empty) multidex suffix of the first key with a substitute for easier reading.
+      std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
+      os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
+    }
+    for (auto class_it : info_.find(dex_file)->second) {
+      for (auto method_it : class_it.second) {
+        os << "\n  " << PrettyMethod(method_it, *dex_file, true);
+      }
+    }
+  }
+  return os.str();
+}
+
 }  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index e3117eb..90bda60 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -21,6 +21,7 @@
 
 #include "atomic.h"
 #include "dex_file.h"
+#include "method_reference.h"
 #include "safe_map.h"
 
 namespace art {
@@ -50,10 +51,47 @@
   bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const;
 
   // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a
-  // singe thread?)
+  // single thread?)
   Atomic<uint64_t> last_update_time_ns_;
 };
 
+/**
+ * Profile information in a format suitable to be queried by the compiler and performing
+ * profile guided compilation.
+ */
+class ProfileCompilationInfo {
+ public:
+  // Constructs a ProfileCompilationInfo backed by the provided file.
+  explicit ProfileCompilationInfo(const std::string& filename) : filename_(filename) {}
+
+  // Loads profile information corresponding to the provided dex files.
+  // The dex files' multidex suffixes must be unique.
+  // This resets the state of the profiling information
+  // (i.e. all previously loaded info are cleared).
+  bool Load(const std::vector<const DexFile*>& dex_files);
+
+  // Returns true if the method reference is present in the profiling info.
+  bool ContainsMethod(const MethodReference& method_ref) const;
+
+  const std::string& GetFilename() const { return filename_; }
+
+  // Dumps all the loaded profile info into a string and returns it.
+  // This is intended for testing and debugging.
+  std::string DumpInfo(bool print_full_dex_location = true) const;
+
+ private:
+  bool ProcessLine(const std::string& line,
+                   const std::vector<const DexFile*>& dex_files);
+
+  using ClassToMethodsMap = SafeMap<uint32_t, std::set<uint32_t>>;
+  // Map identifying the location of the profiled methods.
+  // dex_file -> class_index -> [dex_method_index]+
+  using DexFileToProfileInfoMap = SafeMap<const DexFile*, ClassToMethodsMap>;
+
+  const std::string filename_;
+  DexFileToProfileInfoMap info_;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index 856a3e7..4a1e6c2 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -52,10 +52,10 @@
   exit(status);
 }
 
-static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr) {
+static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPath) {
 #ifdef __ANDROID__
-  if (javaLdLibraryPathJstr != nullptr) {
-    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPathJstr);
+  if (javaLdLibraryPath != nullptr) {
+    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
     if (ldLibraryPath.c_str() != nullptr) {
       android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str());
     }
@@ -63,23 +63,31 @@
 
 #else
   LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
-  UNUSED(javaLdLibraryPathJstr, env);
+  UNUSED(javaLdLibraryPath, env);
 #endif
 }
 
 static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
-                                  jstring javaLdLibraryPathJstr) {
+                                  jstring javaLdLibraryPath, jstring javaIsolationPath) {
   ScopedUtfChars filename(env, javaFilename);
   if (filename.c_str() == nullptr) {
     return nullptr;
   }
 
-  SetLdLibraryPath(env, javaLdLibraryPathJstr);
+  int32_t target_sdk_version = Runtime::Current()->GetTargetSdkVersion();
+
+  // Starting with N nativeLoad uses classloader local
+  // linker namespace instead of global LD_LIBRARY_PATH
+  // (23 is Marshmallow)
+  if (target_sdk_version <= INT_MAX) {
+    SetLdLibraryPath(env, javaLdLibraryPath);
+  }
 
   std::string error_msg;
   {
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-    bool success = vm->LoadNativeLibrary(env, filename.c_str(), javaLoader, &error_msg);
+    bool success = vm->LoadNativeLibrary(env, filename.c_str(), javaLoader,
+                                         javaLdLibraryPath, javaIsolationPath, &error_msg);
     if (success) {
       return nullptr;
     }
@@ -107,7 +115,7 @@
   NATIVE_METHOD(Runtime, gc, "()V"),
   NATIVE_METHOD(Runtime, maxMemory, "!()J"),
   NATIVE_METHOD(Runtime, nativeExit, "(I)V"),
-  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(Runtime, totalMemory, "!()J"),
 };
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 5b10610..2b92303 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -399,6 +399,7 @@
   //  gLogVerbosity.oat = true;  // TODO: don't check this in!
   //  gLogVerbosity.profiler = true;  // TODO: don't check this in!
   //  gLogVerbosity.signals = true;  // TODO: don't check this in!
+  //  gLogVerbosity.simulator = true; // TODO: don't check this in!
   //  gLogVerbosity.startup = true;  // TODO: don't check this in!
   //  gLogVerbosity.third_party_jni = true;  // TODO: don't check this in!
   //  gLogVerbosity.threads = true;  // TODO: don't check this in!
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index fad00c7..06b40fd 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -87,6 +87,8 @@
   EXPECT_FALSE(VLOG_IS_ON(jdwp));
   EXPECT_TRUE(VLOG_IS_ON(jni));
   EXPECT_FALSE(VLOG_IS_ON(monitor));
+  EXPECT_FALSE(VLOG_IS_ON(signals));
+  EXPECT_FALSE(VLOG_IS_ON(simulator));
   EXPECT_FALSE(VLOG_IS_ON(startup));
   EXPECT_FALSE(VLOG_IS_ON(third_party_jni));
   EXPECT_FALSE(VLOG_IS_ON(threads));
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 2e9107d..19cf759 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -33,7 +33,7 @@
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
   if (with_read_barrier && kUseBakerReadBarrier) {
-    // The higher bits of the rb ptr, rb_ptr_high_bits (must be zero)
+    // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
     // is used to create artificial data dependency from the is_gray
     // load to the ref field (ptr) load to avoid needing a load-load
     // barrier between the two.
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 77be6cf..3169a8b 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -80,7 +80,8 @@
   static void AssertToSpaceInvariant(GcRootSource* gc_root_source, mirror::Object* ref)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static mirror::Object* Mark(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE static mirror::Object* Mark(mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   static mirror::Object* WhitePtr() {
     return reinterpret_cast<mirror::Object*>(white_ptr_);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 291b9a0..93ca347 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -219,8 +219,6 @@
     UnloadNativeBridge();
   }
 
-  MaybeSaveJitProfilingInfo();
-
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
@@ -1220,9 +1218,9 @@
   // Most JNI libraries can just use System.loadLibrary, but libcore can't because it's
   // the library that implements System.loadLibrary!
   {
-    std::string reason;
-    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, &reason)) {
-      LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << reason;
+    std::string error_msg;
+    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, nullptr, nullptr, &error_msg)) {
+      LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << error_msg;
     }
   }
 
diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk
new file mode 100644
index 0000000..c154eb6
--- /dev/null
+++ b/runtime/simulator/Android.mk
@@ -0,0 +1,105 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+include art/build/Android.common_build.mk
+
+LIBART_SIMULATOR_SRC_FILES := \
+  code_simulator.cc \
+  code_simulator_arm64.cc
+
+# $(1): target or host
+# $(2): ndebug or debug
+define build-libart-simulator
+  ifneq ($(1),target)
+    ifneq ($(1),host)
+      $$(error expected target or host for argument 1, received $(1))
+    endif
+  endif
+  ifneq ($(2),ndebug)
+    ifneq ($(2),debug)
+      $$(error expected ndebug or debug for argument 2, received $(2))
+    endif
+  endif
+
+  art_target_or_host := $(1)
+  art_ndebug_or_debug := $(2)
+
+  include $(CLEAR_VARS)
+  ifeq ($$(art_target_or_host),host)
+     LOCAL_IS_HOST_MODULE := true
+  endif
+  LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+  ifeq ($$(art_ndebug_or_debug),ndebug)
+    LOCAL_MODULE := libart-simulator
+  else # debug
+    LOCAL_MODULE := libartd-simulator
+  endif
+
+  LOCAL_MODULE_TAGS := optional
+  LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+
+  LOCAL_SRC_FILES := $$(LIBART_SIMULATOR_SRC_FILES)
+
+  ifeq ($$(art_target_or_host),target)
+    $(call set-target-local-clang-vars)
+    $(call set-target-local-cflags-vars,$(2))
+  else # host
+    LOCAL_CLANG := $(ART_HOST_CLANG)
+    LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
+    LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+    LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
+    ifeq ($$(art_ndebug_or_debug),debug)
+      LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
+    else
+      LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
+    endif
+  endif
+
+  LOCAL_SHARED_LIBRARIES += liblog
+  ifeq ($$(art_ndebug_or_debug),debug)
+    LOCAL_SHARED_LIBRARIES += libartd
+  else
+    LOCAL_SHARED_LIBRARIES += libart
+  endif
+
+  LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
+  LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)
+  LOCAL_MULTILIB := both
+
+  LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+  LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
+  # For simulator_arm64.
+  ifeq ($$(art_ndebug_or_debug),debug)
+     LOCAL_SHARED_LIBRARIES += libvixld
+  else
+     LOCAL_SHARED_LIBRARIES += libvixl
+  endif
+  ifeq ($$(art_target_or_host),target)
+    include $(BUILD_SHARED_LIBRARY)
+  else # host
+    include $(BUILD_HOST_SHARED_LIBRARY)
+  endif
+endef
+
+ifeq ($(ART_BUILD_HOST_NDEBUG),true)
+  $(eval $(call build-libart-simulator,host,ndebug))
+endif
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+  $(eval $(call build-libart-simulator,host,debug))
+endif
diff --git a/runtime/simulator/code_simulator.cc b/runtime/simulator/code_simulator.cc
new file mode 100644
index 0000000..1a11160
--- /dev/null
+++ b/runtime/simulator/code_simulator.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "simulator/code_simulator.h"
+#include "simulator/code_simulator_arm64.h"
+
+namespace art {
+
+CodeSimulator* CodeSimulator::CreateCodeSimulator(InstructionSet target_isa) {
+  switch (target_isa) {
+    case kArm64:
+      return arm64::CodeSimulatorArm64::CreateCodeSimulatorArm64();
+    default:
+      return nullptr;
+  }
+}
+
+CodeSimulator* CreateCodeSimulator(InstructionSet target_isa) {
+  return CodeSimulator::CreateCodeSimulator(target_isa);
+}
+
+}  // namespace art
diff --git a/runtime/simulator/code_simulator.h b/runtime/simulator/code_simulator.h
new file mode 100644
index 0000000..bd48909
--- /dev/null
+++ b/runtime/simulator/code_simulator.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_H_
+#define ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_H_
+
+#include "arch/instruction_set.h"
+
+namespace art {
+
+class CodeSimulator {
+ public:
+  CodeSimulator() {}
+  virtual ~CodeSimulator() {}
+  // Returns a null pointer if a simulator cannot be found for target_isa.
+  static CodeSimulator* CreateCodeSimulator(InstructionSet target_isa);
+
+  virtual void RunFrom(intptr_t code_buffer) = 0;
+
+  // Get return value according to C ABI.
+  virtual bool GetCReturnBool() const = 0;
+  virtual int32_t GetCReturnInt32() const = 0;
+  virtual int64_t GetCReturnInt64() const = 0;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CodeSimulator);
+};
+
+extern "C" CodeSimulator* CreateCodeSimulator(InstructionSet target_isa);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_H_
diff --git a/runtime/simulator/code_simulator_arm64.cc b/runtime/simulator/code_simulator_arm64.cc
new file mode 100644
index 0000000..39dfa6d
--- /dev/null
+++ b/runtime/simulator/code_simulator_arm64.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "simulator/code_simulator_arm64.h"
+
+namespace art {
+namespace arm64 {
+
+// VIXL has not been tested on 32bit architectures, so vixl::Simulator is not always
+// available. To avoid linker error on these architectures, we check if we can simulate
+// in the beginning of following methods, with compile time constant `kCanSimulate`.
+// TODO: when vixl::Simulator is always available, remove the these checks.
+
+CodeSimulatorArm64* CodeSimulatorArm64::CreateCodeSimulatorArm64() {
+  if (kCanSimulate) {
+    return new CodeSimulatorArm64();
+  } else {
+    return nullptr;
+  }
+}
+
+CodeSimulatorArm64::CodeSimulatorArm64()
+    : CodeSimulator(), decoder_(nullptr), simulator_(nullptr) {
+  DCHECK(kCanSimulate);
+  decoder_ = new vixl::Decoder();
+  simulator_ = new vixl::Simulator(decoder_);
+}
+
+CodeSimulatorArm64::~CodeSimulatorArm64() {
+  DCHECK(kCanSimulate);
+  delete simulator_;
+  delete decoder_;
+}
+
+void CodeSimulatorArm64::RunFrom(intptr_t code_buffer) {
+  DCHECK(kCanSimulate);
+  simulator_->RunFrom(reinterpret_cast<const vixl::Instruction*>(code_buffer));
+}
+
+bool CodeSimulatorArm64::GetCReturnBool() const {
+  DCHECK(kCanSimulate);
+  return simulator_->wreg(0);
+}
+
+int32_t CodeSimulatorArm64::GetCReturnInt32() const {
+  DCHECK(kCanSimulate);
+  return simulator_->wreg(0);
+}
+
+int64_t CodeSimulatorArm64::GetCReturnInt64() const {
+  DCHECK(kCanSimulate);
+  return simulator_->xreg(0);
+}
+
+}  // namespace arm64
+}  // namespace art
diff --git a/runtime/simulator/code_simulator_arm64.h b/runtime/simulator/code_simulator_arm64.h
new file mode 100644
index 0000000..10fceb9
--- /dev/null
+++ b/runtime/simulator/code_simulator_arm64.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_ARM64_H_
+#define ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_ARM64_H_
+
+#include "memory"
+#include "simulator/code_simulator.h"
+// TODO: make vixl clean wrt -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "vixl/a64/simulator-a64.h"
+#pragma GCC diagnostic pop
+
+namespace art {
+namespace arm64 {
+
+class CodeSimulatorArm64 : public CodeSimulator {
+ public:
+  static CodeSimulatorArm64* CreateCodeSimulatorArm64();
+  virtual ~CodeSimulatorArm64();
+
+  void RunFrom(intptr_t code_buffer) OVERRIDE;
+
+  bool GetCReturnBool() const OVERRIDE;
+  int32_t GetCReturnInt32() const OVERRIDE;
+  int64_t GetCReturnInt64() const OVERRIDE;
+
+ private:
+  CodeSimulatorArm64();
+
+  vixl::Decoder* decoder_;
+  vixl::Simulator* simulator_;
+
+  // TODO: Enable CodeSimulatorArm64 for more host ISAs once vixl::Simulator supports them.
+  static constexpr bool kCanSimulate = (kRuntimeISA == kX86_64);
+
+  DISALLOW_COPY_AND_ASSIGN(CodeSimulatorArm64);
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_RUNTIME_SIMULATOR_CODE_SIMULATOR_ARM64_H_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8a8c02f..13e3774 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2548,6 +2548,7 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
   QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierMark)
   QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
   QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
 #undef QUICK_ENTRY_POINT_INFO
diff --git a/runtime/thread.h b/runtime/thread.h
index c556c36..6cb895c 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -545,6 +545,13 @@
         OFFSETOF_MEMBER(tls_32bit_sized_values, state_and_flags));
   }
 
+  template<size_t pointer_size>
+  static ThreadOffset<pointer_size> IsGcMarkingOffset() {
+    return ThreadOffset<pointer_size>(
+        OFFSETOF_MEMBER(Thread, tls32_) +
+        OFFSETOF_MEMBER(tls_32bit_sized_values, is_gc_marking));
+  }
+
  private:
   template<size_t pointer_size>
   static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) {
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index e2c3afb..2b778d9 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -379,7 +379,10 @@
 
 void WellKnownClasses::LateInit(JNIEnv* env) {
   ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime"));
-  java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;");
+  java_lang_Runtime_nativeLoad =
+      CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad",
+                  "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)"
+                      "Ljava/lang/String;");
 }
 
 mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) {
diff --git a/test/008-exceptions/src/Main.java b/test/008-exceptions/src/Main.java
index 9e3477a..b8231f1 100644
--- a/test/008-exceptions/src/Main.java
+++ b/test/008-exceptions/src/Main.java
@@ -60,7 +60,7 @@
         } catch (NullPointerException npe) {
             System.out.print("Got an NPE: ");
             System.out.println(npe.getMessage());
-            npe.printStackTrace();
+            npe.printStackTrace(System.out);
         }
     }
     public static void main (String args[]) {
@@ -103,7 +103,7 @@
                 System.out.println(e.getCause());
             }
         } catch (Exception error) {
-            error.printStackTrace();
+            error.printStackTrace(System.out);
         }
     }
 
@@ -126,7 +126,7 @@
                 System.out.println(e.getCause());
             }
         } catch (Exception error) {
-            error.printStackTrace();
+            error.printStackTrace(System.out);
         }
     }
 }
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index e9946c8..b70ca4f 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -267,11 +267,20 @@
                                          const char* app_code_cache_dir,
                                          const char* isa ATTRIBUTE_UNUSED) {
   struct stat st;
-  if ((app_code_cache_dir != nullptr)
-      && (stat(app_code_cache_dir, &st) == 0)
-      && S_ISDIR(st.st_mode)) {
-    printf("Code cache exists: '%s'.\n", app_code_cache_dir);
+  if (app_code_cache_dir != nullptr) {
+    if (stat(app_code_cache_dir, &st) == 0) {
+      if (S_ISDIR(st.st_mode)) {
+        printf("Code cache exists: '%s'.\n", app_code_cache_dir);
+      } else {
+        printf("Code cache is not a directory.\n");
+      }
+    } else {
+      perror("Error when stat-ing the code_cache:");
+    }
+  } else {
+    printf("app_code_cache_dir is null.\n");
   }
+
   if (art_cbs != nullptr) {
     gNativeBridgeArtCallbacks = art_cbs;
     printf("Native bridge initialized.\n");
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index 3f6e48b..e827b1e 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -27,6 +27,7 @@
 
   /// CHECK-START: int Main.linear(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -40,6 +41,7 @@
 
   /// CHECK-START: int Main.linearDown(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearDown(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -53,6 +55,7 @@
 
   /// CHECK-START: int Main.linearObscure(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -67,6 +70,7 @@
 
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -79,8 +83,26 @@
     return result;
   }
 
+  /// CHECK-START: int Main.hiddenStride(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.hiddenStride(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int hiddenStride(int[] a) {
+    int result = 0;
+    for (int i = 1; i <= 1; i++) {
+      // Obscured unit stride.
+      for (int j = 0; j < a.length; j += i) {
+        result += a[j];
+      }
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.linearWhile(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -95,6 +117,7 @@
 
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -112,6 +135,7 @@
 
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -133,6 +157,7 @@
 
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -149,6 +174,7 @@
 
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -169,6 +195,7 @@
 
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -182,6 +209,7 @@
 
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -197,6 +225,7 @@
   /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -213,6 +242,7 @@
 
   /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -226,6 +256,7 @@
 
   /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -240,6 +271,7 @@
 
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -256,6 +288,7 @@
 
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -273,6 +306,7 @@
 
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -290,6 +324,7 @@
 
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -307,6 +342,7 @@
 
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -324,6 +360,7 @@
 
   /// CHECK-START: int Main.linearForNEUp() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearForNEUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -338,6 +375,7 @@
 
   /// CHECK-START: int Main.linearForNEDown() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearForNEDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -352,6 +390,7 @@
 
   /// CHECK-START: int Main.linearDoWhileUp() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -367,6 +406,7 @@
 
   /// CHECK-START: int Main.linearDoWhileDown() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -382,6 +422,7 @@
 
   /// CHECK-START: int Main.linearShort() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.linearShort() BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -397,6 +438,7 @@
 
   /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -422,6 +464,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
   /// CHECK-DAG: ArraySet
+  //
   /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-DAG: ArrayGet
@@ -451,6 +494,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
   /// CHECK-DAG: ArraySet
+  //
   /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-DAG: ArrayGet
@@ -480,6 +524,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
   /// CHECK-DAG: ArraySet
+  //
   /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-DAG: ArrayGet
@@ -502,7 +547,54 @@
     }
   }
 
-  // Verifier for triangular methods.
+  /// CHECK-START: void Main.linearTriangularVariations(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: void Main.linearTriangularVariations(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularVariations(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < n; i++) {
+      for (int j = 0; j < i; j++) {
+        a[j] += 1;
+      }
+      for (int j = i - 1; j >= 0; j--) {
+        a[j] += 1;
+      }
+      for (int j = i; j < n; j++) {
+        a[j] += 1;
+      }
+      for (int j = n - 1; j > i - 1; j--) {
+        a[j] += 1;
+      }
+    }
+    verifyTriangular(a);
+  }
+
+  // Verifier for triangular loops.
   private static void verifyTriangular(int[] a, int[] b, int m, int n) {
     expectEquals(n, a.length);
     for (int i = 0, k = m; i < n; i++) {
@@ -515,6 +607,14 @@
     }
   }
 
+  // Verifier for triangular loops.
+  private static void verifyTriangular(int[] a) {
+    int n = a.length;
+    for (int i = 0; i < n; i++) {
+      expectEquals(a[i], n + n);
+    }
+  }
+
   /// CHECK-START: void Main.bubble(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
@@ -523,6 +623,7 @@
   /// CHECK-DAG: If
   /// CHECK-DAG: ArraySet
   /// CHECK-DAG: ArraySet
+  //
   /// CHECK-START: void Main.bubble(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-DAG: ArrayGet
@@ -546,6 +647,7 @@
 
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -563,6 +665,7 @@
 
   /// CHECK-START: int Main.periodicSequence2(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -586,6 +689,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -610,6 +714,7 @@
 
   /// CHECK-START: int Main.justRightUp1() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightUp1() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -624,6 +729,7 @@
 
   /// CHECK-START: int Main.justRightUp2() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightUp2() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -638,6 +744,7 @@
 
   /// CHECK-START: int Main.justRightUp3() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightUp3() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -652,6 +759,7 @@
 
   /// CHECK-START: int Main.justOOBUp() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justOOBUp() BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -667,6 +775,7 @@
 
   /// CHECK-START: int Main.justRightDown1() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightDown1() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -681,6 +790,7 @@
 
   /// CHECK-START: int Main.justRightDown2() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightDown2() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -695,6 +805,7 @@
 
   /// CHECK-START: int Main.justRightDown3() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justRightDown3() BCE (after)
   /// CHECK-NOT: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -709,6 +820,7 @@
 
   /// CHECK-START: int Main.justOOBDown() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.justOOBDown() BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -724,6 +836,7 @@
 
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -735,6 +848,7 @@
 
   /// CHECK-START: void Main.upperOOB(int[]) BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -746,6 +860,7 @@
 
   /// CHECK-START: void Main.doWhileUpOOB() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -759,6 +874,7 @@
 
   /// CHECK-START: void Main.doWhileDownOOB() BCE (before)
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-NOT: Deoptimize
@@ -770,6 +886,55 @@
     } while (-1 <= i);
   }
 
+  /// CHECK-START: int[] Main.multiply1() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: int[] Main.multiply1() BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static int[] multiply1() {
+    int[] a = new int[10];
+    try {
+      for (int i = 0; i <= 3; i++) {
+        for (int j = 0; j <= 3; j++) {
+          // Range [0,9]: safe.
+          a[i * j] += 1;
+        }
+      }
+    } catch (Exception e) {
+      a[0] += 1000;
+    }
+    return a;
+  }
+
+  /// CHECK-START: int[] Main.multiply2() BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  //
+  /// CHECK-START: int[] Main.multiply2() BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  static int[] multiply2() {
+    int[] a = new int[10];
+    try {
+      for (int i = -3; i <= 3; i++) {
+        for (int j = -3; j <= 3; j++) {
+          // Range [-9,9]: unsafe.
+         a[i * j] += 1;
+        }
+      }
+    } catch (Exception e) {
+      a[0] += 1000;
+    }
+    return a;
+  }
+
   /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
   /// CHECK-DAG: StaticFieldGet
   /// CHECK-DAG: NullCheck
@@ -777,6 +942,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
   /// CHECK-DAG: StaticFieldSet
+  //
   /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
   /// CHECK-DAG: StaticFieldGet
   /// CHECK-NOT: NullCheck
@@ -803,6 +969,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
   /// CHECK-DAG: StaticFieldSet
+  //
   /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
   /// CHECK-DAG: StaticFieldGet
   /// CHECK-NOT: NullCheck
@@ -827,6 +994,7 @@
   /// CHECK-DAG: ArrayLength
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
+  //
   /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: Deoptimize
@@ -850,6 +1018,7 @@
   /// CHECK-DAG: ArrayLength
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
+  //
   /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: Deoptimize
@@ -873,6 +1042,7 @@
   /// CHECK-DAG: ArrayLength
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
+  //
   /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: ArrayLength
@@ -897,6 +1067,7 @@
   /// CHECK-DAG: ArrayLength
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
+  //
   /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
   /// CHECK-DAG: NullCheck
   /// CHECK-DAG: ArrayLength
@@ -918,6 +1089,7 @@
   /// CHECK-DAG: ArrayLength
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
+  //
   /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
   /// CHECK-DAG: NullCheck
   /// CHECK-DAG: ArrayLength
@@ -951,6 +1123,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: BoundsCheck
+  //
   /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
   /// CHECK-DAG: NullCheck
   /// CHECK-DAG: ArrayLength
@@ -1015,6 +1188,7 @@
   /// CHECK-DAG: ArrayGet
   /// CHECK-DAG: BoundsCheck
   /// CHECK-DAG: ArrayGet
+  //
   /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (after)
   /// CHECK-DAG: If
   /// CHECK-NOT: BoundsCheck
@@ -1087,6 +1261,8 @@
     expectEquals(55, linearObscure(x));
     expectEquals(0, linearVeryObscure(empty));
     expectEquals(55, linearVeryObscure(x));
+    expectEquals(0, hiddenStride(empty));
+    expectEquals(55, hiddenStride(x));
     expectEquals(0, linearWhile(empty));
     expectEquals(55, linearWhile(x));
     expectEquals(0, linearThreeWayPhi(empty));
@@ -1144,6 +1320,7 @@
     linearTriangularOnTwoArrayLengths(10);
     linearTriangularOnOneArrayLength(10);
     linearTriangularOnParameter(10);
+    linearTriangularVariations(10);
 
     // Sorting.
     int[] sort = { 5, 4, 1, 9, 10, 2, 7, 6, 3, 8 };
@@ -1234,6 +1411,20 @@
     }
     expectEquals(1055, sResult);
 
+    // Multiplication.
+    {
+      int[] e1 = { 7, 1, 2, 2, 1, 0, 2, 0, 0, 1 };
+      int[] a1 = multiply1();
+      for (int i = 0; i < 10; i++) {
+        expectEquals(a1[i], e1[i]);
+      }
+      int[] e2 = { 1001, 0, 0, 1, 0, 0, 1, 0, 0, 1 };
+      int[] a2 = multiply2();
+      for (int i = 0; i < 10; i++) {
+        expectEquals(a2[i], e2[i]);
+      }
+    }
+
     // Dynamic BCE.
     sResult = 0;
     try {
diff --git a/test/542-bitfield-rotates/src/Main.java b/test/542-bitfield-rotates/src/Main.java
index 9ef5f93..f2bc153 100644
--- a/test/542-bitfield-rotates/src/Main.java
+++ b/test/542-bitfield-rotates/src/Main.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/test/541-checker-instruction-simplifier-rotate/expected.txt b/test/557-checker-instruction-simplifier-ror/expected.txt
similarity index 100%
rename from test/541-checker-instruction-simplifier-rotate/expected.txt
rename to test/557-checker-instruction-simplifier-ror/expected.txt
diff --git a/test/541-checker-instruction-simplifier-rotate/info.txt b/test/557-checker-instruction-simplifier-ror/info.txt
similarity index 100%
rename from test/541-checker-instruction-simplifier-rotate/info.txt
rename to test/557-checker-instruction-simplifier-ror/info.txt
diff --git a/test/541-checker-instruction-simplifier-rotate/src/Main.java b/test/557-checker-instruction-simplifier-ror/src/Main.java
similarity index 100%
rename from test/541-checker-instruction-simplifier-rotate/src/Main.java
rename to test/557-checker-instruction-simplifier-ror/src/Main.java
diff --git a/test/541-checker-instruction-simplifier-rotate/expected.txt b/test/558-switch/expected.txt
similarity index 100%
copy from test/541-checker-instruction-simplifier-rotate/expected.txt
copy to test/558-switch/expected.txt
diff --git a/test/558-switch/info.txt b/test/558-switch/info.txt
new file mode 100644
index 0000000..07283ff
--- /dev/null
+++ b/test/558-switch/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing that used to generate invalid
+code for arm.
diff --git a/test/558-switch/src/Main.java b/test/558-switch/src/Main.java
new file mode 100644
index 0000000..f44231e
--- /dev/null
+++ b/test/558-switch/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+   public static boolean testMethod(int statusCode) {
+        switch (statusCode) {
+        case 303:
+        case 301:
+        case 302:
+        case 307:
+            return true;
+        default:
+            return false;
+        } //end of switch
+    }
+
+  public static void main(String[] args) {
+    if (!testMethod(301)) {
+      throw new Error("Unexpected result");
+    }
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index beaafd0..7589f8f 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -276,7 +276,8 @@
 TEST_ART_BROKEN_NO_RELOCATE_TESTS := \
   117-nopatchoat \
   118-noimage-dex2oat \
-  119-noimage-patchoat
+  119-noimage-patchoat \
+  554-jit-profile-file
 
 ifneq (,$(filter no-relocate,$(RELOCATE_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -298,6 +299,7 @@
   412-new-array \
   471-uninitialized-locals \
   506-verify-aput \
+  554-jit-profile-file \
   800-smali
 
 ifneq (,$(filter interp-ac,$(COMPILER_TYPES)))
@@ -356,13 +358,15 @@
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_FALLBACK_RUN_TESTS := \
   116-nodex2oat \
   117-nopatchoat \
   118-noimage-dex2oat \
   119-noimage-patchoat \
   137-cfi \
-  138-duplicate-classes-check2
+  138-duplicate-classes-check2 \
+  554-jit-profile-file
 
 # This test fails without an image.
 TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \
@@ -413,7 +417,8 @@
 # Known broken tests for the interpreter.
 # CFI unwinding expects managed frames.
 TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  554-jit-profile-file
 
 ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -454,7 +459,7 @@
     441-checker-inliner \
     510-checker-try-catch \
     536-checker-intrinsic-optimization \
-    541-checker-instruction-simplifier-rotate \
+    557-checker-instruction-simplifier-ror \
 
 ifeq (mips,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -469,7 +474,7 @@
 
 # Known broken tests for the mips64 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \
-    541-checker-instruction-simplifier-rotate \
+    557-checker-instruction-simplifier-ror \
 
 ifeq (mips64,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -506,37 +511,54 @@
 
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS :=
 
-# Tests that should fail in the read barrier configuration.
-# 055: Exceeds run time limits due to read barrier instrumentation.
-# 137: Read barrier forces interpreter. Cannot run this with the interpreter.
-# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
-#      handled in the read barrier configuration.
-TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \
-  055-enum-performance                    \
-  137-cfi                                 \
+
+# Tests that should fail in the read barrier configuration with the default (Quick) compiler.
+# 137: Quick has no support for read barriers and punts to the
+#      interpreter, but CFI unwinding expects managed frames.
+# 554: Quick does not support JIT profiling.
+TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS := \
+  137-cfi \
+  554-jit-profile-file
+
+# Tests that should fail in the read barrier configuration with the Optimizing compiler.
+# 484: Baker's fast path based read barrier compiler instrumentation generates code containing
+#      more parallel moves on x86, thus some Checker assertions may fail.
+# 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
+#      not yet handled in the read barrier configuration.
+TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
+  484-checker-register-hints \
   537-checker-arraycopy
 
 ifeq ($(ART_USE_READ_BARRIER),true)
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-      $(PREBUILD_TYPES),$(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
-      $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-      $(TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  ifneq (,$(filter default,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),default,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
+
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
 endif
 
-TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS :=
+TEST_ART_BROKEN_DEFAULT_READ_BARRIER_RUN_TESTS :=
+TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
 
 # Tests that should fail in the heap poisoning configuration with the default (Quick) compiler.
-# 137: Quick punts to the interpreter, and this test cannot run this with the interpreter.
+# 137: Quick has no support for read barriers and punts to the
+#      interpreter, but CFI unwinding expects managed frames.
+# 554: Quick does not support JIT profiling.
 TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  554-jit-profile-file
 # Tests that should fail in the heap poisoning configuration with the Optimizing compiler.
-# 055-enum-performance: Exceeds run time limits due to heap poisoning instrumentation.
+# 055: Exceeds run time limits due to heap poisoning instrumentation (on ARM and ARM64 devices).
 TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := \
   055-enum-performance
-# Tests that should fail in the heap poisoning configuration with the interpreter.
-# 137: Cannot run this with the interpreter.
-TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS := \
-  137-cfi
 
 ifeq ($(ART_HEAP_POISONING),true)
   ifneq (,$(filter default,$(COMPILER_TYPES)))
@@ -552,18 +574,10 @@
         $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
         $(TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
   endif
-
-  ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-  endif
 endif
 
-TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS :=
-TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
 TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS :=
+TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
 
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
diff --git a/test/run-test b/test/run-test
index 6e13b8a..60e008c 100755
--- a/test/run-test
+++ b/test/run-test
@@ -41,7 +41,7 @@
 fi
 checker="${progdir}/../tools/checker/checker.py"
 export JAVA="java"
-export JAVAC="javac -g"
+export JAVAC="javac -g -source 1.7 -target 1.7 -Xlint:-options"
 export RUN="${progdir}/etc/run-test-jar"
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index a5476f7..a2d2f23 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -170,5 +170,12 @@
   result: EXEC_FAILED,
   names: ["org.apache.harmony.tests.java.util.WeakHashMapTest#test_keySet"],
   bug: 25437292
+},
+{
+  description: "JSR166TestCase.waitForThreadToEnterWaitState seems to time out; needs investigation.",
+  result: EXEC_FAILED,
+  names: ["jsr166.LinkedTransferQueueTest#testTransfer2",
+          "jsr166.LinkedTransferQueueTest#testWaitingConsumer"],
+  bug: 25883050
 }
 ]