Merge "Revert "ART: Use bionic TLS slot for thread-self""
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 63ad9cf..4850e6c 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -165,6 +165,7 @@
   runtime/base/hex_dump_test.cc \
   runtime/base/histogram_test.cc \
   runtime/base/mutex_test.cc \
+  runtime/base/out_test.cc \
   runtime/base/scoped_flock_test.cc \
   runtime/base/stringprintf_test.cc \
   runtime/base/time_utils_test.cc \
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 2568ee3..7fc6fa2 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -38,6 +38,7 @@
     true,   // kIntrinsicFloatCvt
     true,   // kIntrinsicReverseBits
     true,   // kIntrinsicReverseBytes
+    true,   // kIntrinsicNumberOfLeadingZeros
     true,   // kIntrinsicAbsInt
     true,   // kIntrinsicAbsLong
     true,   // kIntrinsicAbsFloat
@@ -75,6 +76,8 @@
 static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros],
+              "NumberOfLeadingZeros must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsInt], "AbsInt must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsLong], "AbsLong must be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static");
@@ -225,6 +228,7 @@
     "putObjectVolatile",     // kNameCachePutObjectVolatile
     "putOrderedObject",      // kNameCachePutOrderedObject
     "arraycopy",             // kNameCacheArrayCopy
+    "numberOfLeadingZeros",  // kNameCacheNumberOfLeadingZeros
 };
 
 const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = {
@@ -368,6 +372,9 @@
     INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32),
     INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64),
 
+    INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32),
+    INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64),
+
     INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
     INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
     INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
@@ -614,6 +621,8 @@
                                           intrinsic.d.data & kIntrinsicFlagIsOrdered);
     case kIntrinsicSystemArrayCopyCharArray:
       return backend->GenInlinedArrayCopyCharArray(info);
+    case kIntrinsicNumberOfLeadingZeros:
+      return false;  // not implemented in quick
     default:
       LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
       return false;  // avoid warning "control reaches end of non-void function"
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index a8cb9f0..bcb9ee5 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -206,6 +206,7 @@
       kNameCachePutObjectVolatile,
       kNameCachePutOrderedObject,
       kNameCacheArrayCopy,
+      kNameCacheNumberOfLeadingZeros,
       kNameCacheLast
     };
 
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index dd68dd4..16c161e 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -36,7 +36,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef HAVE_ANDROID_OS
+#ifndef __ANDROID__
 
 class QuickCFITest : public CFITest {
  public:
@@ -56,6 +56,8 @@
       CompilerOptions::kDefaultSmallMethodThreshold,
       CompilerOptions::kDefaultTinyMethodThreshold,
       CompilerOptions::kDefaultNumDexMethodsThreshold,
+      CompilerOptions::kDefaultInlineDepthLimit,
+      CompilerOptions::kDefaultInlineMaxCodeUnits,
       false,
       CompilerOptions::kDefaultTopKProfileThreshold,
       false,
@@ -134,6 +136,6 @@
 TEST_ISA(kMips)
 TEST_ISA(kMips64)
 
-#endif  // HAVE_ANDROID_OS
+#endif  // __ANDROID__
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index 798e23f..98e9f38 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -39,6 +39,8 @@
         CompilerOptions::kDefaultSmallMethodThreshold,
         CompilerOptions::kDefaultTinyMethodThreshold,
         CompilerOptions::kDefaultNumDexMethodsThreshold,
+        CompilerOptions::kDefaultInlineDepthLimit,
+        CompilerOptions::kDefaultInlineMaxCodeUnits,
         false,
         CompilerOptions::kDefaultTopKProfileThreshold,
         false,
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a35f306..affa52a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -690,66 +690,76 @@
   return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end();
 }
 
-static void ResolveExceptionsForMethod(
-    ArtMethod* method_handle, std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  const DexFile::CodeItem* code_item = method_handle->GetCodeItem();
-  if (code_item == nullptr) {
-    return;  // native or abstract method
-  }
-  if (code_item->tries_size_ == 0) {
-    return;  // nothing to process
-  }
-  const uint8_t* encoded_catch_handler_list = DexFile::GetCatchHandlerData(*code_item, 0);
-  size_t num_encoded_catch_handlers = DecodeUnsignedLeb128(&encoded_catch_handler_list);
-  for (size_t i = 0; i < num_encoded_catch_handlers; i++) {
-    int32_t encoded_catch_handler_size = DecodeSignedLeb128(&encoded_catch_handler_list);
-    bool has_catch_all = false;
-    if (encoded_catch_handler_size <= 0) {
-      encoded_catch_handler_size = -encoded_catch_handler_size;
-      has_catch_all = true;
+class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
+ public:
+  ResolveCatchBlockExceptionsClassVisitor(
+      std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
+     : exceptions_to_resolve_(exceptions_to_resolve) {}
+
+  void ResolveExceptionsForMethod(ArtMethod* method_handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+    const DexFile::CodeItem* code_item = method_handle->GetCodeItem();
+    if (code_item == nullptr) {
+      return;  // native or abstract method
     }
-    for (int32_t j = 0; j < encoded_catch_handler_size; j++) {
-      uint16_t encoded_catch_handler_handlers_type_idx =
-          DecodeUnsignedLeb128(&encoded_catch_handler_list);
-      // Add to set of types to resolve if not already in the dex cache resolved types
-      if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
-        exceptions_to_resolve.insert(
-            std::pair<uint16_t, const DexFile*>(encoded_catch_handler_handlers_type_idx,
-                                                method_handle->GetDexFile()));
+    if (code_item->tries_size_ == 0) {
+      return;  // nothing to process
+    }
+    const uint8_t* encoded_catch_handler_list = DexFile::GetCatchHandlerData(*code_item, 0);
+    size_t num_encoded_catch_handlers = DecodeUnsignedLeb128(&encoded_catch_handler_list);
+    for (size_t i = 0; i < num_encoded_catch_handlers; i++) {
+      int32_t encoded_catch_handler_size = DecodeSignedLeb128(&encoded_catch_handler_list);
+      bool has_catch_all = false;
+      if (encoded_catch_handler_size <= 0) {
+        encoded_catch_handler_size = -encoded_catch_handler_size;
+        has_catch_all = true;
       }
-      // ignore address associated with catch handler
-      DecodeUnsignedLeb128(&encoded_catch_handler_list);
-    }
-    if (has_catch_all) {
-      // ignore catch all address
-      DecodeUnsignedLeb128(&encoded_catch_handler_list);
+      for (int32_t j = 0; j < encoded_catch_handler_size; j++) {
+        uint16_t encoded_catch_handler_handlers_type_idx =
+            DecodeUnsignedLeb128(&encoded_catch_handler_list);
+        // Add to set of types to resolve if not already in the dex cache resolved types
+        if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
+          exceptions_to_resolve_.emplace(encoded_catch_handler_handlers_type_idx,
+                                         method_handle->GetDexFile());
+        }
+        // ignore address associated with catch handler
+        DecodeUnsignedLeb128(&encoded_catch_handler_list);
+      }
+      if (has_catch_all) {
+        // ignore catch all address
+        DecodeUnsignedLeb128(&encoded_catch_handler_list);
+      }
     }
   }
-}
 
-static bool ResolveCatchBlockExceptionsClassVisitor(mirror::Class* c, void* arg)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  auto* exceptions_to_resolve =
-      reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*>>*>(arg);
-  const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-  for (auto& m : c->GetVirtualMethods(pointer_size)) {
-    ResolveExceptionsForMethod(&m, *exceptions_to_resolve);
+  virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+    for (auto& m : c->GetVirtualMethods(pointer_size)) {
+      ResolveExceptionsForMethod(&m);
+    }
+    for (auto& m : c->GetDirectMethods(pointer_size)) {
+      ResolveExceptionsForMethod(&m);
+    }
+    return true;
   }
-  for (auto& m : c->GetDirectMethods(pointer_size)) {
-    ResolveExceptionsForMethod(&m, *exceptions_to_resolve);
-  }
-  return true;
-}
 
-static bool RecordImageClassesVisitor(mirror::Class* klass, void* arg)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  std::unordered_set<std::string>* image_classes =
-      reinterpret_cast<std::unordered_set<std::string>*>(arg);
-  std::string temp;
-  image_classes->insert(klass->GetDescriptor(&temp));
-  return true;
-}
+ private:
+  std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve_;
+};
+
+class RecordImageClassesVisitor : public ClassVisitor {
+ public:
+  explicit RecordImageClassesVisitor(std::unordered_set<std::string>* image_classes)
+      : image_classes_(image_classes) {}
+
+  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    std::string temp;
+    image_classes_->insert(klass->GetDescriptor(&temp));
+    return true;
+  }
+
+ private:
+  std::unordered_set<std::string>* const image_classes_;
+};
 
 // Make a list of descriptors for classes to include in the image
 void CompilerDriver::LoadImageClasses(TimingLogger* timings) {
@@ -787,8 +797,8 @@
       hs.NewHandle(class_linker->FindSystemClass(self, "Ljava/lang/Throwable;")));
   do {
     unresolved_exception_types.clear();
-    class_linker->VisitClasses(ResolveCatchBlockExceptionsClassVisitor,
-                               &unresolved_exception_types);
+    ResolveCatchBlockExceptionsClassVisitor visitor(unresolved_exception_types);
+    class_linker->VisitClasses(&visitor);
     for (const std::pair<uint16_t, const DexFile*>& exception_type : unresolved_exception_types) {
       uint16_t exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
@@ -811,7 +821,8 @@
   // We walk the roots looking for classes so that we'll pick up the
   // above classes plus any classes them depend on such super
   // classes, interfaces, and the required ClassLinker roots.
-  class_linker->VisitClasses(RecordImageClassesVisitor, image_classes_.get());
+  RecordImageClassesVisitor visitor(image_classes_.get());
+  class_linker->VisitClasses(&visitor);
 
   CHECK_NE(image_classes_->size(), 0U);
 }
@@ -899,6 +910,29 @@
   }
 
  private:
+  class FindImageClassesVisitor : public ClassVisitor {
+   public:
+    explicit FindImageClassesVisitor(ClinitImageUpdate* data) : data_(data) {}
+
+    bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+      std::string temp;
+      const char* name = klass->GetDescriptor(&temp);
+      if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) {
+        data_->image_classes_.push_back(klass);
+      } else {
+        // Check whether it is initialized and has a clinit. They must be kept, too.
+        if (klass->IsInitialized() && klass->FindClassInitializer(
+            Runtime::Current()->GetClassLinker()->GetImagePointerSize()) != nullptr) {
+          data_->image_classes_.push_back(klass);
+        }
+      }
+      return true;
+    }
+
+   private:
+    ClinitImageUpdate* const data_;
+  };
+
   ClinitImageUpdate(std::unordered_set<std::string>* image_class_descriptors, Thread* self,
                     ClassLinker* linker)
       SHARED_REQUIRES(Locks::mutator_lock_) :
@@ -915,25 +949,8 @@
 
     // Find all the already-marked classes.
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    linker->VisitClasses(FindImageClasses, this);
-  }
-
-  static bool FindImageClasses(mirror::Class* klass, void* arg)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    ClinitImageUpdate* data = reinterpret_cast<ClinitImageUpdate*>(arg);
-    std::string temp;
-    const char* name = klass->GetDescriptor(&temp);
-    if (data->image_class_descriptors_->find(name) != data->image_class_descriptors_->end()) {
-      data->image_classes_.push_back(klass);
-    } else {
-      // Check whether it is initialized and has a clinit. They must be kept, too.
-      if (klass->IsInitialized() && klass->FindClassInitializer(
-          Runtime::Current()->GetClassLinker()->GetImagePointerSize()) != nullptr) {
-        data->image_classes_.push_back(klass);
-      }
-    }
-
-    return true;
+    FindImageClassesVisitor visitor(this);
+    linker->VisitClasses(&visitor);
   }
 
   void VisitClinitClassesObject(mirror::Object* object) const
@@ -1731,7 +1748,7 @@
   explicit ResolveClassFieldsAndMethodsVisitor(const ParallelCompilationManager* manager)
       : manager_(manager) {}
 
-  virtual void Visit(size_t class_def_index) OVERRIDE REQUIRES(!Locks::mutator_lock_) {
+  void Visit(size_t class_def_index) OVERRIDE REQUIRES(!Locks::mutator_lock_) {
     ATRACE_CALL();
     Thread* const self = Thread::Current();
     jobject jclass_loader = manager_->GetClassLoader();
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 226e6b7..3f5a1ea 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -27,6 +27,8 @@
       small_method_threshold_(kDefaultSmallMethodThreshold),
       tiny_method_threshold_(kDefaultTinyMethodThreshold),
       num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
+      inline_depth_limit_(kDefaultInlineDepthLimit),
+      inline_max_code_units_(kDefaultInlineMaxCodeUnits),
       include_patch_information_(kDefaultIncludePatchInformation),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
@@ -52,6 +54,8 @@
                                  size_t small_method_threshold,
                                  size_t tiny_method_threshold,
                                  size_t num_dex_methods_threshold,
+                                 size_t inline_depth_limit,
+                                 size_t inline_max_code_units,
                                  bool include_patch_information,
                                  double top_k_profile_threshold,
                                  bool debuggable,
@@ -71,6 +75,8 @@
     small_method_threshold_(small_method_threshold),
     tiny_method_threshold_(tiny_method_threshold),
     num_dex_methods_threshold_(num_dex_methods_threshold),
+    inline_depth_limit_(inline_depth_limit),
+    inline_max_code_units_(inline_max_code_units),
     include_patch_information_(include_patch_information),
     top_k_profile_threshold_(top_k_profile_threshold),
     debuggable_(debuggable),
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index fe681e2..17b19dd 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -51,6 +51,8 @@
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
   static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
   static const bool kDefaultIncludePatchInformation = false;
+  static const size_t kDefaultInlineDepthLimit = 3;
+  static const size_t kDefaultInlineMaxCodeUnits = 18;
 
   CompilerOptions();
   ~CompilerOptions();
@@ -61,6 +63,8 @@
                   size_t small_method_threshold,
                   size_t tiny_method_threshold,
                   size_t num_dex_methods_threshold,
+                  size_t inline_depth_limit,
+                  size_t inline_max_code_units,
                   bool include_patch_information,
                   double top_k_profile_threshold,
                   bool debuggable,
@@ -137,6 +141,14 @@
     return num_dex_methods_threshold_;
   }
 
+  size_t GetInlineDepthLimit() const {
+    return inline_depth_limit_;
+  }
+
+  size_t GetInlineMaxCodeUnits() const {
+    return inline_max_code_units_;
+  }
+
   double GetTopKProfileThreshold() const {
     return top_k_profile_threshold_;
   }
@@ -202,6 +214,8 @@
   const size_t small_method_threshold_;
   const size_t tiny_method_threshold_;
   const size_t num_dex_methods_threshold_;
+  const size_t inline_depth_limit_;
+  const size_t inline_max_code_units_;
   const bool include_patch_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
   const double top_k_profile_threshold_;
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index 4d423d0..a07d27c 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -27,7 +27,7 @@
 namespace dwarf {
 
 // Run the tests only on host since we need objdump.
-#ifndef HAVE_ANDROID_OS
+#ifndef __ANDROID__
 
 constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT;
 
@@ -336,7 +336,7 @@
   CheckObjdumpOutput(is64bit, "-W");
 }
 
-#endif  // HAVE_ANDROID_OS
+#endif  // __ANDROID__
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 293a488..17d75a3 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -141,7 +141,7 @@
     return false;
   }
   std::string error_msg;
-  oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_location, nullptr, &error_msg);
+  oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_location, nullptr, outof(error_msg));
   if (oat_file_ == nullptr) {
     PLOG(ERROR) << "Failed to open writable oat file " << oat_filename << " for " << oat_location
         << ": " << error_msg;
@@ -539,16 +539,19 @@
   return true;
 }
 
+class ComputeLazyFieldsForClassesVisitor : public ClassVisitor {
+ public:
+  bool Visit(Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    StackHandleScope<1> hs(Thread::Current());
+    mirror::Class::ComputeName(hs.NewHandle(c));
+    return true;
+  }
+};
+
 void ImageWriter::ComputeLazyFieldsForImageClasses() {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  class_linker->VisitClassesWithoutClassesLock(ComputeLazyFieldsForClassesVisitor, nullptr);
-}
-
-bool ImageWriter::ComputeLazyFieldsForClassesVisitor(Class* c, void* /*arg*/) {
-  Thread* self = Thread::Current();
-  StackHandleScope<1> hs(self);
-  mirror::Class::ComputeName(hs.NewHandle(c));
-  return true;
+  ComputeLazyFieldsForClassesVisitor visitor;
+  class_linker->VisitClassesWithoutClassesLock(&visitor);
 }
 
 void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) {
@@ -592,9 +595,20 @@
   return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
 }
 
-struct NonImageClasses {
-  ImageWriter* image_writer;
-  std::set<std::string>* non_image_classes;
+class NonImageClassesVisitor : public ClassVisitor {
+ public:
+  explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
+
+  bool Visit(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!image_writer_->IsImageClass(klass)) {
+      std::string temp;
+      non_image_classes_.insert(klass->GetDescriptor(&temp));
+    }
+    return true;
+  }
+
+  std::set<std::string> non_image_classes_;
+  ImageWriter* const image_writer_;
 };
 
 void ImageWriter::PruneNonImageClasses() {
@@ -606,14 +620,11 @@
   Thread* self = Thread::Current();
 
   // Make a list of classes we would like to prune.
-  std::set<std::string> non_image_classes;
-  NonImageClasses context;
-  context.image_writer = this;
-  context.non_image_classes = &non_image_classes;
-  class_linker->VisitClasses(NonImageClassesVisitor, &context);
+  NonImageClassesVisitor visitor(this);
+  class_linker->VisitClasses(&visitor);
 
   // Remove the undesired classes from the class roots.
-  for (const std::string& it : non_image_classes) {
+  for (const std::string& it : visitor.non_image_classes_) {
     bool result = class_linker->RemoveClass(it.c_str(), nullptr);
     DCHECK(result);
   }
@@ -669,15 +680,6 @@
   class_linker->DropFindArrayClassCache();
 }
 
-bool ImageWriter::NonImageClassesVisitor(Class* klass, void* arg) {
-  NonImageClasses* context = reinterpret_cast<NonImageClasses*>(arg);
-  if (!context->image_writer->IsImageClass(klass)) {
-    std::string temp;
-    context->non_image_classes->insert(klass->GetDescriptor(&temp));
-  }
-  return true;
-}
-
 void ImageWriter::CheckNonImageClassesRemoved() {
   if (compiler_driver_.GetImageClasses() != nullptr) {
     gc::Heap* heap = Runtime::Current()->GetHeap();
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 42b1cbf..cabd918 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -217,8 +217,6 @@
   // Preinitializes some otherwise lazy fields (such as Class name) to avoid runtime image dirtying.
   void ComputeLazyFieldsForImageClasses()
       SHARED_REQUIRES(Locks::mutator_lock_);
-  static bool ComputeLazyFieldsForClassesVisitor(mirror::Class* klass, void* arg)
-      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Wire dex cache resolved strings to strings in the image to avoid runtime resolution.
   void ComputeEagerResolvedStrings() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -227,8 +225,6 @@
 
   // Remove unwanted classes from various roots.
   void PruneNonImageClasses() SHARED_REQUIRES(Locks::mutator_lock_);
-  static bool NonImageClassesVisitor(mirror::Class* c, void* arg)
-      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Verify unwanted classes removed.
   void CheckNonImageClassesRemoved() SHARED_REQUIRES(Locks::mutator_lock_);
@@ -376,6 +372,7 @@
   friend class FixupClassVisitor;
   friend class FixupRootVisitor;
   friend class FixupVisitor;
+  friend class NonImageClassesVisitor;
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
 };
 
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index d70211f..c95bac2 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -71,6 +71,8 @@
       CompilerOptions::kDefaultSmallMethodThreshold,
       CompilerOptions::kDefaultTinyMethodThreshold,
       CompilerOptions::kDefaultNumDexMethodsThreshold,
+      CompilerOptions::kDefaultInlineDepthLimit,
+      CompilerOptions::kDefaultInlineMaxCodeUnits,
       /* include_patch_information */ false,
       CompilerOptions::kDefaultTopKProfileThreshold,
       Runtime::Current()->IsDebuggable(),
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 016f28e..0bfe8a2 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -28,7 +28,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef HAVE_ANDROID_OS
+#ifndef __ANDROID__
 
 class JNICFITest : public CFITest {
  public:
@@ -88,6 +88,6 @@
 TEST_ISA(kMips)
 TEST_ISA(kMips64)
 
-#endif  // HAVE_ANDROID_OS
+#endif  // __ANDROID__
 
 }  // namespace art
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index c98a5f8..05a33d7 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -16,6 +16,7 @@
 
 #include "arch/instruction_set_features.h"
 #include "art_method-inl.h"
+#include "base/out.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiled_method.h"
@@ -83,7 +84,7 @@
 
   std::string error_msg;
   std::unique_ptr<const InstructionSetFeatures> insn_features(
-      InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
+      InstructionSetFeatures::FromVariant(insn_set, "default", outof(error_msg)));
   ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
   compiler_options_.reset(new CompilerOptions);
   verification_results_.reset(new VerificationResults(compiler_options_.get()));
@@ -123,7 +124,7 @@
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
   }
   std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), tmp.GetFilename(), nullptr,
-                                                  nullptr, false, nullptr, &error_msg));
+                                                  nullptr, false, nullptr, outof(error_msg)));
   ASSERT_TRUE(oat_file.get() != nullptr) << error_msg;
   const OatHeader& oat_header = oat_file->GetOatHeader();
   ASSERT_TRUE(oat_header.IsValid());
@@ -183,14 +184,14 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(112 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(113 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
     InstructionSet insn_set = kX86;
     std::string error_msg;
     std::unique_ptr<const InstructionSetFeatures> insn_features(
-        InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
+        InstructionSetFeatures::FromVariant(insn_set, "default", outof(error_msg)));
     ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
     std::vector<const DexFile*> dex_files;
     uint32_t image_file_location_oat_checksum = 0;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index e15eff9..0569565 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2348,7 +2348,12 @@
     case Primitive::kPrimInt:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
-      locations->SetOut(Location::SameAsFirstInput());
+      if (mul->InputAt(1)->IsIntConstant()) {
+        // Can use 3 operand multiply.
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetOut(Location::SameAsFirstInput());
+      }
       break;
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -2376,21 +2381,24 @@
   LocationSummary* locations = mul->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
+  Location out = locations->Out();
 
   switch (mul->GetResultType()) {
-    case Primitive::kPrimInt: {
-      if (second.IsRegister()) {
+    case Primitive::kPrimInt:
+      // The constant may have ended up in a register, so test explicitly to avoid
+      // problems where the output may not be the same as the first operand.
+      if (mul->InputAt(1)->IsIntConstant()) {
+        Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
+        __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
+      } else if (second.IsRegister()) {
+        DCHECK(first.Equals(out));
         __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
-      } else if (second.IsConstant()) {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ imull(first.AsRegister<Register>(), imm);
       } else {
         DCHECK(second.IsStackSlot());
+        DCHECK(first.Equals(out));
         __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
       }
       break;
-    }
 
     case Primitive::kPrimLong: {
       Register in1_hi = first.AsRegisterPairHigh<Register>();
@@ -4535,7 +4543,11 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
+    DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
+    __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
     Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index a95ce68..287737b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2535,13 +2535,19 @@
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::Any());
-      locations->SetOut(Location::SameAsFirstInput());
+      if (mul->InputAt(1)->IsIntConstant()) {
+        // Can use 3 operand multiply.
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetOut(Location::SameAsFirstInput());
+      }
       break;
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(mul->InputAt(1)));
-      if (locations->InAt(1).IsConstant()) {
+      locations->SetInAt(1, Location::Any());
+      if (mul->InputAt(1)->IsLongConstant() &&
+          IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
         // Can use 3 operand multiply.
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       } else {
@@ -2566,37 +2572,51 @@
   LocationSummary* locations = mul->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
+  Location out = locations->Out();
   switch (mul->GetResultType()) {
-    case Primitive::kPrimInt: {
-      DCHECK(first.Equals(locations->Out()));
-      if (second.IsRegister()) {
+    case Primitive::kPrimInt:
+      // The constant may have ended up in a register, so test explicitly to avoid
+      // problems where the output may not be the same as the first operand.
+      if (mul->InputAt(1)->IsIntConstant()) {
+        Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
+        __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
+      } else if (second.IsRegister()) {
+        DCHECK(first.Equals(out));
         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
-      } else if (second.IsConstant()) {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ imull(first.AsRegister<CpuRegister>(), imm);
       } else {
+        DCHECK(first.Equals(out));
         DCHECK(second.IsStackSlot());
         __ imull(first.AsRegister<CpuRegister>(),
                  Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
-    }
     case Primitive::kPrimLong: {
-      if (second.IsConstant()) {
-        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(value));
-        __ imulq(locations->Out().AsRegister<CpuRegister>(),
-                 first.AsRegister<CpuRegister>(),
-                 Immediate(static_cast<int32_t>(value)));
-      } else {
-        DCHECK(first.Equals(locations->Out()));
+      // The constant may have ended up in a register, so test explicitly to avoid
+      // problems where the output may not be the same as the first operand.
+      if (mul->InputAt(1)->IsLongConstant()) {
+        int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
+        if (IsInt<32>(value)) {
+          __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
+                   Immediate(static_cast<int32_t>(value)));
+        } else {
+          // Have to use the constant area.
+          DCHECK(first.Equals(out));
+          __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
+        }
+      } else if (second.IsRegister()) {
+        DCHECK(first.Equals(out));
         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        DCHECK(first.Equals(out));
+        __ imulq(first.AsRegister<CpuRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
-      DCHECK(first.Equals(locations->Out()));
+      DCHECK(first.Equals(out));
       if (second.IsFpuRegister()) {
         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
@@ -2611,7 +2631,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      DCHECK(first.Equals(locations->Out()));
+      DCHECK(first.Equals(out));
       if (second.IsFpuRegister()) {
         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else if (second.IsConstant()) {
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index cfebb77..e4bc9e6 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -89,6 +89,33 @@
                           block->GetBlockId()));
   }
 
+  // Ensure that the only Return(Void) and Throw jump to Exit. An exiting
+  // TryBoundary may be between a Throw and the Exit if the Throw is in a try.
+  if (block->IsExitBlock()) {
+    for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
+      HBasicBlock* predecessor = block->GetPredecessors().Get(i);
+      if (predecessor->IsSingleTryBoundary()
+          && !predecessor->GetLastInstruction()->AsTryBoundary()->IsEntry()) {
+        HBasicBlock* real_predecessor = predecessor->GetSinglePredecessor();
+        HInstruction* last_instruction = real_predecessor->GetLastInstruction();
+        if (!last_instruction->IsThrow()) {
+          AddError(StringPrintf("Unexpected TryBoundary between %s:%d and Exit.",
+                                last_instruction->DebugName(),
+                                last_instruction->GetId()));
+        }
+      } else {
+        HInstruction* last_instruction = predecessor->GetLastInstruction();
+        if (!last_instruction->IsReturn()
+            && !last_instruction->IsReturnVoid()
+            && !last_instruction->IsThrow()) {
+          AddError(StringPrintf("Unexpected instruction %s:%d jumps into the exit block.",
+                                last_instruction->DebugName(),
+                                last_instruction->GetId()));
+        }
+      }
+    }
+  }
+
   // Visit this block's list of phis.
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     HInstruction* current = it.Current();
@@ -328,6 +355,39 @@
 void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
   super_type::VisitBasicBlock(block);
 
+  // Ensure that only catch blocks have exceptional predecessors, and if they do
+  // these are instructions which throw into them.
+  if (block->IsCatchBlock()) {
+    for (size_t i = 0, e = block->GetExceptionalPredecessors().Size(); i < e; ++i) {
+      HInstruction* thrower = block->GetExceptionalPredecessors().Get(i);
+      HBasicBlock* try_block = thrower->GetBlock();
+      if (!thrower->CanThrow()) {
+        AddError(StringPrintf("Exceptional predecessor %s:%d of catch block %d does not throw.",
+                              thrower->DebugName(),
+                              thrower->GetId(),
+                              block->GetBlockId()));
+      } else if (!try_block->IsInTry()) {
+        AddError(StringPrintf("Exceptional predecessor %s:%d of catch block %d "
+                              "is not in a try block.",
+                              thrower->DebugName(),
+                              thrower->GetId(),
+                              block->GetBlockId()));
+      } else if (!try_block->GetTryEntry()->HasExceptionHandler(*block)) {
+        AddError(StringPrintf("Catch block %d is not an exception handler of "
+                              "its exceptional predecessor %s:%d.",
+                              block->GetBlockId(),
+                              thrower->DebugName(),
+                              thrower->GetId()));
+      }
+    }
+  } else {
+    if (!block->GetExceptionalPredecessors().IsEmpty()) {
+      AddError(StringPrintf("Normal block %d has %zu exceptional predecessors.",
+                            block->GetBlockId(),
+                            block->GetExceptionalPredecessors().Size()));
+    }
+  }
+
   // Ensure that catch blocks are not normal successors, and normal blocks are
   // never exceptional successors.
   const size_t num_normal_successors = block->NumberOfNormalSuccessors();
@@ -512,6 +572,7 @@
 
 void SSAChecker::VisitInstruction(HInstruction* instruction) {
   super_type::VisitInstruction(instruction);
+  HBasicBlock* block = instruction->GetBlock();
 
   // Ensure an instruction dominates all its uses.
   for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
@@ -543,6 +604,24 @@
       }
     }
   }
+
+  // Ensure that throwing instructions in try blocks are listed as exceptional
+  // predecessors in their exception handlers.
+  if (instruction->CanThrow() && block->IsInTry()) {
+    for (HExceptionHandlerIterator handler_it(*block->GetTryEntry());
+         !handler_it.Done();
+         handler_it.Advance()) {
+      if (!handler_it.Current()->GetExceptionalPredecessors().Contains(instruction)) {
+        AddError(StringPrintf("Instruction %s:%d is in try block %d and can throw "
+                              "but its exception handler %d does not list it in "
+                              "its exceptional predecessors.",
+                              instruction->DebugName(),
+                              instruction->GetId(),
+                              block->GetBlockId(),
+                              handler_it.Current()->GetBlockId()));
+      }
+    }
+  }
 }
 
 static Primitive::Type PrimitiveKind(Primitive::Type type) {
@@ -590,11 +669,32 @@
   if (phi->IsCatchPhi()) {
     // The number of inputs of a catch phi corresponds to the total number of
     // throwing instructions caught by this catch block.
+    const GrowableArray<HInstruction*>& predecessors =
+        phi->GetBlock()->GetExceptionalPredecessors();
+    if (phi->InputCount() != predecessors.Size()) {
+      AddError(StringPrintf(
+          "Phi %d in catch block %d has %zu inputs, "
+          "but catch block %d has %zu exceptional predecessors.",
+          phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(),
+          phi->GetBlock()->GetBlockId(), predecessors.Size()));
+    } else {
+      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+        HInstruction* input = phi->InputAt(i);
+        HInstruction* thrower = predecessors.Get(i);
+        if (!input->StrictlyDominates(thrower)) {
+          AddError(StringPrintf(
+              "Input %d at index %zu of phi %d from catch block %d does not "
+              "dominate the throwing instruction %s:%d.",
+              input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(),
+              thrower->DebugName(), thrower->GetId()));
+        }
+      }
+    }
   } else {
     // Ensure the number of inputs of a non-catch phi is the same as the number
     // of its predecessors.
     const GrowableArray<HBasicBlock*>& predecessors =
-      phi->GetBlock()->GetPredecessors();
+        phi->GetBlock()->GetPredecessors();
     if (phi->InputCount() != predecessors.Size()) {
       AddError(StringPrintf(
           "Phi %d in block %d has %zu inputs, "
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index eca0d93..0f66775 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -25,14 +25,14 @@
  * Create a simple control-flow graph composed of two blocks:
  *
  *   BasicBlock 0, succ: 1
- *     0: Goto 1
+ *     0: ReturnVoid 1
  *   BasicBlock 1, pred: 0
  *     1: Exit
  */
 HGraph* CreateSimpleCFG(ArenaAllocator* allocator) {
   HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry_block = new (allocator) HBasicBlock(graph);
-  entry_block->AddInstruction(new (allocator) HGoto());
+  entry_block->AddInstruction(new (allocator) HReturnVoid());
   graph->AddBlock(entry_block);
   graph->SetEntryBlock(entry_block);
   HBasicBlock* exit_block = new (allocator) HBasicBlock(graph);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index afea403..069a7a4 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -386,6 +386,7 @@
     StartAttributeStream("recursive") << std::boolalpha
                                       << invoke->IsRecursive()
                                       << std::noboolalpha;
+    StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
   }
 
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE {
@@ -396,6 +397,11 @@
     return strcmp(pass_name_, name) == 0;
   }
 
+  bool IsReferenceTypePropagationPass() {
+    return strstr(pass_name_, ReferenceTypePropagation::kReferenceTypePropagationPassName)
+        != nullptr;
+  }
+
   void PrintInstruction(HInstruction* instruction) {
     output_ << instruction->DebugName();
     if (instruction->InputCount() > 0) {
@@ -459,27 +465,19 @@
       } else {
         StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
       }
-    } else if (IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)
-               && is_after_pass_) {
-      if (instruction->GetType() == Primitive::kPrimNot) {
-        if (instruction->IsLoadClass()) {
-          ReferenceTypeInfo info = instruction->AsLoadClass()->GetLoadedClassRTI();
-          ScopedObjectAccess soa(Thread::Current());
-          if (info.GetTypeHandle().GetReference() != nullptr) {
-            StartAttributeStream("klass") << PrettyClass(info.GetTypeHandle().Get());
-          } else {
-            StartAttributeStream("klass") << "unresolved";
-          }
-        } else {
-          ReferenceTypeInfo info = instruction->GetReferenceTypeInfo();
-          if (info.IsTop()) {
-            StartAttributeStream("klass") << "java.lang.Object";
-          } else {
-            ScopedObjectAccess soa(Thread::Current());
-            StartAttributeStream("klass") << PrettyClass(info.GetTypeHandle().Get());
-          }
-          StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
-        }
+    } else if (IsReferenceTypePropagationPass()
+        && (instruction->GetType() == Primitive::kPrimNot)) {
+      ReferenceTypeInfo info = instruction->IsLoadClass()
+        ? instruction->AsLoadClass()->GetLoadedClassRTI()
+        : instruction->GetReferenceTypeInfo();
+      ScopedObjectAccess soa(Thread::Current());
+      if (info.IsValid()) {
+        StartAttributeStream("klass") << PrettyDescriptor(info.GetTypeHandle().Get());
+        StartAttributeStream("can_be_null")
+            << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+        StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
+      } else {
+        DCHECK(!is_after_pass_) << "Type info should be valid after reference type propagation";
       }
     }
     if (disasm_info_ != nullptr) {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index c185b58..0106595 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -22,8 +22,10 @@
 #include "constant_folding.h"
 #include "dead_code_elimination.h"
 #include "driver/compiler_driver-inl.h"
+#include "driver/compiler_options.h"
 #include "driver/dex_compilation_unit.h"
 #include "instruction_simplifier.h"
+#include "intrinsics.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "nodes.h"
@@ -38,9 +40,6 @@
 
 namespace art {
 
-static constexpr int kMaxInlineCodeUnits = 18;
-static constexpr int kDepthLimit = 3;
-
 void HInliner::Run() {
   if (graph_->IsDebuggable()) {
     // For simplicity, we currently never inline when the graph is debuggable. This avoids
@@ -109,10 +108,8 @@
     receiver = receiver->InputAt(0);
   }
   ReferenceTypeInfo info = receiver->GetReferenceTypeInfo();
-  if (info.IsTop()) {
-    // We have no information on the receiver.
-    return nullptr;
-  } else if (!info.IsExact()) {
+  DCHECK(info.IsValid()) << "Invalid RTI for " << receiver->DebugName();
+  if (!info.IsExact()) {
     // We currently only support inlining with known receivers.
     // TODO: Remove this check, we should be able to inline final methods
     // on unknown receivers.
@@ -221,7 +218,8 @@
     return false;
   }
 
-  if (code_item->insns_size_in_code_units_ > kMaxInlineCodeUnits) {
+  size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
+  if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                    << " is too big to inline";
     return false;
@@ -273,11 +271,11 @@
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
   uint32_t method_index = resolved_method->GetDexMethodIndex();
-
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   DexCompilationUnit dex_compilation_unit(
     nullptr,
     caller_compilation_unit_.GetClassLoader(),
-    caller_compilation_unit_.GetClassLinker(),
+    class_linker,
     *resolved_method->GetDexFile(),
     code_item,
     resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
@@ -358,8 +356,10 @@
   HConstantFolding fold(callee_graph);
   ReferenceTypePropagation type_propagation(callee_graph, handles_);
   InstructionSimplifier simplify(callee_graph, stats_);
+  IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_);
 
   HOptimization* optimizations[] = {
+    &intrinsics,
     &dce,
     &fold,
     &type_propagation,
@@ -371,7 +371,7 @@
     optimization->Run();
   }
 
-  if (depth_ + 1 < kDepthLimit) {
+  if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
     HInliner inliner(callee_graph,
                      outer_compilation_unit_,
                      dex_compilation_unit,
@@ -450,7 +450,33 @@
     }
   }
 
-  callee_graph->InlineInto(graph_, invoke_instruction);
+  HInstruction* return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
+
+  // When merging the graph we might create a new NullConstant in the caller graph which does
+  // not have the chance to be typed. We assign the correct type here so that we can keep the
+  // assertion that every reference has a valid type. This also simplifies checks along the way.
+  HNullConstant* null_constant = graph_->GetNullConstant();
+  if (!null_constant->GetReferenceTypeInfo().IsValid()) {
+    ReferenceTypeInfo::TypeHandle obj_handle =
+            handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
+    null_constant->SetReferenceTypeInfo(
+            ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
+  }
+
+  if ((return_replacement != nullptr)
+      && (return_replacement->GetType() == Primitive::kPrimNot)) {
+    if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
+      // Make sure that we have a valid type for the return. We may get an invalid one when
+      // we inline invokes with multiple branches and create a Phi for the result.
+      // TODO: we could be more precise by merging the phi inputs but that requires
+      // some functionality from the reference type propagation.
+      DCHECK(return_replacement->IsPhi());
+      ReferenceTypeInfo::TypeHandle return_handle =
+        handles_->NewHandle(resolved_method->GetReturnType());
+      return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
+         return_handle, return_handle->IsFinal() /* is_exact */));
+    }
+  }
 
   return true;
 }
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index b30b6c7..d391145 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -195,16 +195,17 @@
 // Returns whether doing a type test between the class of `object` against `klass` has
 // a statically known outcome. The result of the test is stored in `outcome`.
 static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) {
-  if (!klass->IsResolved()) {
-    // If the class couldn't be resolve it's not safe to compare against it. It's
-    // default type would be Top which might be wider that the actual class type
-    // and thus producing wrong results.
+  DCHECK(!object->IsNullConstant()) << "Null constants should be special cased";
+  ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo();
+  ScopedObjectAccess soa(Thread::Current());
+  if (!obj_rti.IsValid()) {
+    // We run the simplifier before the reference type propagation so type info might not be
+    // available.
     return false;
   }
 
-  ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo();
   ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI();
-  ScopedObjectAccess soa(Thread::Current());
+  DCHECK(class_rti.IsValid() && class_rti.IsExact());
   if (class_rti.IsSupertypeOf(obj_rti)) {
     *outcome = true;
     return true;
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 8ef13e1..55e964e 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -103,6 +103,16 @@
           LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
           UNREACHABLE();
       }
+    case kIntrinsicNumberOfLeadingZeros:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::kPrimInt:
+          return Intrinsics::kIntegerNumberOfLeadingZeros;
+        case Primitive::kPrimLong:
+          return Intrinsics::kLongNumberOfLeadingZeros;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
 
     // Abs.
     case kIntrinsicAbsDouble:
@@ -359,7 +369,7 @@
 std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
   switch (intrinsic) {
     case Intrinsics::kNone:
-      os << "No intrinsic.";
+      os << "None";
       break;
 #define OPTIMIZING_INTRINSICS(Name, IsStatic) \
     case Intrinsics::k ## Name: \
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index b4dbf75..a797654 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -224,6 +224,48 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
+static void GenNumberOfLeadingZeros(LocationSummary* locations,
+                                    Primitive::Type type,
+                                    ArmAssembler* assembler) {
+  Location in = locations->InAt(0);
+  Register out = locations->Out().AsRegister<Register>();
+
+  DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+
+  if (type == Primitive::kPrimLong) {
+    Register in_reg_lo = in.AsRegisterPairLow<Register>();
+    Register in_reg_hi = in.AsRegisterPairHigh<Register>();
+    Label end;
+    __ clz(out, in_reg_hi);
+    __ CompareAndBranchIfNonZero(in_reg_hi, &end);
+    __ clz(out, in_reg_lo);
+    __ AddConstant(out, 32);
+    __ Bind(&end);
+  } else {
+    __ clz(out, in.AsRegister<Register>());
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 78ac167..2c93fea 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -260,6 +260,33 @@
   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetVIXLAssembler());
 }
 
+static void GenNumberOfLeadingZeros(LocationSummary* locations,
+                                    Primitive::Type type,
+                                    vixl::MacroAssembler* masm) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  Location in = locations->InAt(0);
+  Location out = locations->Out();
+
+  __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
+}
+
 static void GenReverse(LocationSummary* locations,
                        Primitive::Type type,
                        vixl::MacroAssembler* masm) {
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 2c9248f..d28c5a3 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -27,8 +27,10 @@
   V(FloatIntBitsToFloat, kStatic) \
   V(IntegerReverse, kStatic) \
   V(IntegerReverseBytes, kStatic) \
+  V(IntegerNumberOfLeadingZeros, kStatic) \
   V(LongReverse, kStatic) \
   V(LongReverseBytes, kStatic) \
+  V(LongNumberOfLeadingZeros, kStatic) \
   V(ShortReverseBytes, kStatic) \
   V(MathAbsDouble, kStatic) \
   V(MathAbsFloat, kStatic) \
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 0d6ca09..993c005 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1756,6 +1756,8 @@
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
+UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ea342e9..8ab0b77 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1615,6 +1615,8 @@
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
+UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 519fa00..61dadc2 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -564,6 +564,13 @@
   return false;
 }
 
+void HBasicBlock::AddExceptionalPredecessor(HInstruction* exceptional_predecessor) {
+  DCHECK(exceptional_predecessor->CanThrow());
+  DCHECK(exceptional_predecessor->GetBlock()->IsInTry());
+  DCHECK(exceptional_predecessor->GetBlock()->GetTryEntry()->HasExceptionHandler(*this));
+  exceptional_predecessors_.Add(exceptional_predecessor);
+}
+
 static void UpdateInputsUsers(HInstruction* instruction) {
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
     instruction->InputAt(i)->AddUseAt(instruction, i);
@@ -1225,10 +1232,12 @@
     return false;
   }
 
-  // Exception handler lists cannot contain duplicates, which makes it
-  // sufficient to test inclusion only in one direction.
-  for (HExceptionHandlerIterator it(other); !it.Done(); it.Advance()) {
-    if (!HasExceptionHandler(*it.Current())) {
+  // Exception handlers need to be stored in the same order.
+  for (HExceptionHandlerIterator it1(*this), it2(other);
+       !it1.Done();
+       it1.Advance(), it2.Advance()) {
+    DCHECK(!it2.Done());
+    if (it1.Current() != it2.Current()) {
       return false;
     }
   }
@@ -1485,7 +1494,7 @@
   blocks_.Put(block->GetBlockId(), nullptr);
 }
 
-void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
+HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
   DCHECK(HasExitBlock()) << "Unimplemented scenario";
   // Update the environments in this graph to have the invoke's environment
   // as parent.
@@ -1510,6 +1519,7 @@
     outer_graph->SetHasBoundsChecks(true);
   }
 
+  HInstruction* return_value = nullptr;
   if (GetBlocks().Size() == 3) {
     // Simple case of an entry block, a body block, and an exit block.
     // Put the body block's instruction into `invoke`'s block.
@@ -1524,7 +1534,8 @@
 
     // Replace the invoke with the return value of the inlined graph.
     if (last->IsReturn()) {
-      invoke->ReplaceWith(last->InputAt(0));
+      return_value = last->InputAt(0);
+      invoke->ReplaceWith(return_value);
     } else {
       DCHECK(last->IsReturnVoid());
     }
@@ -1546,7 +1557,6 @@
 
     // Update all predecessors of the exit block (now the `to` block)
     // to not `HReturn` but `HGoto` instead.
-    HInstruction* return_value = nullptr;
     bool returns_void = to->GetPredecessors().Get(0)->GetLastInstruction()->IsReturnVoid();
     if (to->GetPredecessors().Size() == 1) {
       HBasicBlock* predecessor = to->GetPredecessors().Get(0);
@@ -1680,6 +1690,8 @@
 
   // Finally remove the invoke from the caller.
   invoke->GetBlock()->RemoveInstruction(invoke);
+
+  return return_value;
 }
 
 /*
@@ -1757,11 +1769,39 @@
   }
 }
 
+void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) {
+  if (kIsDebugBuild) {
+    DCHECK_EQ(GetType(), Primitive::kPrimNot);
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(rti.IsValid()) << "Invalid RTI for " << DebugName();
+    if (IsBoundType()) {
+      // Having the test here spares us from making the method virtual just for
+      // the sake of a DCHECK.
+      ReferenceTypeInfo upper_bound_rti = AsBoundType()->GetUpperBound();
+      DCHECK(upper_bound_rti.IsSupertypeOf(rti))
+          << " upper_bound_rti: " << upper_bound_rti
+          << " rti: " << rti;
+      DCHECK(!upper_bound_rti.GetTypeHandle()->IsFinal() || rti.IsExact());
+    }
+  }
+  reference_type_info_ = rti;
+}
+
+ReferenceTypeInfo::ReferenceTypeInfo() : type_handle_(TypeHandle()), is_exact_(false) {}
+
+ReferenceTypeInfo::ReferenceTypeInfo(TypeHandle type_handle, bool is_exact)
+    : type_handle_(type_handle), is_exact_(is_exact) {
+  if (kIsDebugBuild) {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(IsValidHandle(type_handle));
+  }
+}
+
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
   ScopedObjectAccess soa(Thread::Current());
   os << "["
-     << " is_top=" << rhs.IsTop()
-     << " type=" << (rhs.IsTop() ? "?" : PrettyClass(rhs.GetTypeHandle().Get()))
+     << " is_valid=" << rhs.IsValid()
+     << " type=" << (!rhs.IsValid() ? "?" : PrettyClass(rhs.GetTypeHandle().Get()))
      << " is_exact=" << rhs.IsExact()
      << " ]";
   return os;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7f446d4..9b8521d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -58,6 +58,7 @@
 static const int kDefaultNumberOfBlocks = 8;
 static const int kDefaultNumberOfSuccessors = 2;
 static const int kDefaultNumberOfPredecessors = 2;
+static const int kDefaultNumberOfExceptionalPredecessors = 0;
 static const int kDefaultNumberOfDominatedBlocks = 1;
 static const int kDefaultNumberOfBackEdges = 1;
 
@@ -210,7 +211,9 @@
   void ComputeTryBlockInformation();
 
   // Inline this graph in `outer_graph`, replacing the given `invoke` instruction.
-  void InlineInto(HGraph* outer_graph, HInvoke* invoke);
+  // Returns the instruction used to replace the invoke expression or null if the
+  // invoke is for a void method.
+  HInstruction* InlineInto(HGraph* outer_graph, HInvoke* invoke);
 
   // Need to add a couple of blocks to test if the loop body is entered and
   // put deoptimization instructions, etc.
@@ -306,7 +309,12 @@
   // already, it is created and inserted into the graph. This method is only for
   // integral types.
   HConstant* GetConstant(Primitive::Type type, int64_t value);
+
+  // TODO: This is problematic for the consistency of reference type propagation
+  // because it can be created anytime after the pass and thus it will be left
+  // with an invalid type.
   HNullConstant* GetNullConstant();
+
   HIntConstant* GetIntConstant(int32_t value) {
     return CreateConstant(value, &cached_int_constants_);
   }
@@ -557,6 +565,7 @@
   explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc)
       : graph_(graph),
         predecessors_(graph->GetArena(), kDefaultNumberOfPredecessors),
+        exceptional_predecessors_(graph->GetArena(), kDefaultNumberOfExceptionalPredecessors),
         successors_(graph->GetArena(), kDefaultNumberOfSuccessors),
         loop_information_(nullptr),
         dominator_(nullptr),
@@ -571,6 +580,10 @@
     return predecessors_;
   }
 
+  const GrowableArray<HInstruction*>& GetExceptionalPredecessors() const {
+    return exceptional_predecessors_;
+  }
+
   const GrowableArray<HBasicBlock*>& GetSuccessors() const {
     return successors_;
   }
@@ -639,6 +652,8 @@
   HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
   const HInstructionList& GetPhis() const { return phis_; }
 
+  void AddExceptionalPredecessor(HInstruction* exceptional_predecessor);
+
   void AddSuccessor(HBasicBlock* block) {
     successors_.Add(block);
     block->predecessors_.Add(this);
@@ -678,6 +693,10 @@
     predecessors_.Delete(block);
   }
 
+  void RemoveExceptionalPredecessor(HInstruction* instruction) {
+    exceptional_predecessors_.Delete(instruction);
+  }
+
   void RemoveSuccessor(HBasicBlock* block) {
     successors_.Delete(block);
   }
@@ -714,6 +733,15 @@
     return -1;
   }
 
+  size_t GetExceptionalPredecessorIndexOf(HInstruction* exceptional_predecessor) const {
+    for (size_t i = 0, e = exceptional_predecessors_.Size(); i < e; ++i) {
+      if (exceptional_predecessors_.Get(i) == exceptional_predecessor) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
   size_t GetSuccessorIndexOf(HBasicBlock* successor) const {
     for (size_t i = 0, e = successors_.Size(); i < e; ++i) {
       if (successors_.Get(i) == successor) {
@@ -874,6 +902,7 @@
  private:
   HGraph* graph_;
   GrowableArray<HBasicBlock*> predecessors_;
+  GrowableArray<HInstruction*> exceptional_predecessors_;
   GrowableArray<HBasicBlock*> successors_;
   HInstructionList instructions_;
   HInstructionList phis_;
@@ -1460,79 +1489,64 @@
  public:
   typedef Handle<mirror::Class> TypeHandle;
 
-  static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (type_handle->IsObjectClass()) {
-      // Override the type handle to be consistent with the case when we get to
-      // Top but don't have the Object class available. It avoids having to guess
-      // what value the type_handle has when it's Top.
-      return ReferenceTypeInfo(TypeHandle(), is_exact, true);
-    } else {
-      return ReferenceTypeInfo(type_handle, is_exact, false);
-    }
+  static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) {
+    // The constructor will check that the type_handle is valid.
+    return ReferenceTypeInfo(type_handle, is_exact);
   }
 
-  static ReferenceTypeInfo CreateTop(bool is_exact) {
-    return ReferenceTypeInfo(TypeHandle(), is_exact, true);
+  static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); }
+
+  static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) {
+    return handle.GetReference() != nullptr;
   }
 
+  bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return IsValidHandle(type_handle_);
+  }
   bool IsExact() const { return is_exact_; }
-  bool IsTop() const { return is_top_; }
+
+  bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsObjectClass();
+  }
   bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return !IsTop() && GetTypeHandle()->IsInterface();
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsInterface();
   }
 
   Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
 
   bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (IsTop()) {
-      // Top (equivalent for java.lang.Object) is supertype of anything.
-      return true;
-    }
-    if (rti.IsTop()) {
-      // If we get here `this` is not Top() so it can't be a supertype.
-      return false;
-    }
+    DCHECK(IsValid());
+    DCHECK(rti.IsValid());
     return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
   }
 
   // Returns true if the type information provide the same amount of details.
   // Note that it does not mean that the instructions have the same actual type
-  // (e.g. tops are equal but they can be the result of a merge).
+  // (because the type can be the result of a merge).
   bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (IsExact() != rti.IsExact()) {
-      return false;
-    }
-    if (IsTop() && rti.IsTop()) {
-      // `Top` means java.lang.Object, so the types are equivalent.
+    if (!IsValid() && !rti.IsValid()) {
+      // Invalid types are equal.
       return true;
     }
-    if (IsTop() || rti.IsTop()) {
-      // If only one is top or object than they are not equivalent.
-      // NB: We need this extra check because the type_handle of `Top` is invalid
-      // and we cannot inspect its reference.
+    if (!IsValid() || !rti.IsValid()) {
+      // One is valid, the other not.
       return false;
     }
-
-    // Finally check the types.
-    return GetTypeHandle().Get() == rti.GetTypeHandle().Get();
+    return IsExact() == rti.IsExact()
+        && GetTypeHandle().Get() == rti.GetTypeHandle().Get();
   }
 
  private:
-  ReferenceTypeInfo() : ReferenceTypeInfo(TypeHandle(), false, true) {}
-  ReferenceTypeInfo(TypeHandle type_handle, bool is_exact, bool is_top)
-      : type_handle_(type_handle), is_exact_(is_exact), is_top_(is_top) {}
+  ReferenceTypeInfo();
+  ReferenceTypeInfo(TypeHandle type_handle, bool is_exact);
 
   // The class of the object.
   TypeHandle type_handle_;
   // Whether or not the type is exact or a superclass of the actual type.
   // Whether or not we have any information about this type.
   bool is_exact_;
-  // A true value here means that the object type should be java.lang.Object.
-  // We don't have access to the corresponding mirror object every time so this
-  // flag acts as a substitute. When true, the TypeHandle refers to a null
-  // pointer and should not be used.
-  bool is_top_;
 };
 
 std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
@@ -1550,7 +1564,7 @@
         live_interval_(nullptr),
         lifetime_position_(kNoLifetime),
         side_effects_(side_effects),
-        reference_type_info_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {}
+        reference_type_info_(ReferenceTypeInfo::CreateInvalid()) {}
 
   virtual ~HInstruction() {}
 
@@ -1596,6 +1610,7 @@
 
   // Does not apply for all instructions, but having this at top level greatly
   // simplifies the null check elimination.
+  // TODO: Consider merging can_be_null into ReferenceTypeInfo.
   virtual bool CanBeNull() const {
     DCHECK_EQ(GetType(), Primitive::kPrimNot) << "CanBeNull only applies to reference types";
     return true;
@@ -1606,10 +1621,7 @@
     return false;
   }
 
-  void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) {
-    DCHECK_EQ(GetType(), Primitive::kPrimNot);
-    reference_type_info_ = reference_type_info;
-  }
+  void SetReferenceTypeInfo(ReferenceTypeInfo rti);
 
   ReferenceTypeInfo GetReferenceTypeInfo() const {
     DCHECK_EQ(GetType(), Primitive::kPrimNot);
@@ -3904,7 +3916,7 @@
         is_referrers_class_(is_referrers_class),
         dex_pc_(dex_pc),
         generate_clinit_check_(false),
-        loaded_class_rti_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {
+        loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
     SetRawInputAt(0, current_method);
   }
 
@@ -3955,10 +3967,6 @@
     loaded_class_rti_ = rti;
   }
 
-  bool IsResolved() {
-    return loaded_class_rti_.IsExact();
-  }
-
   const DexFile& GetDexFile() { return dex_file_; }
 
   bool NeedsDexCache() const OVERRIDE { return !is_referrers_class_; }
@@ -4201,27 +4209,43 @@
 
 class HBoundType : public HExpression<1> {
  public:
-  HBoundType(HInstruction* input, ReferenceTypeInfo bound_type)
+  // Constructs an HBoundType with the given upper_bound.
+  // Ensures that the upper_bound is valid.
+  HBoundType(HInstruction* input, ReferenceTypeInfo upper_bound, bool upper_can_be_null)
       : HExpression(Primitive::kPrimNot, SideEffects::None()),
-        bound_type_(bound_type) {
+        upper_bound_(upper_bound),
+        upper_can_be_null_(upper_can_be_null),
+        can_be_null_(upper_can_be_null) {
     DCHECK_EQ(input->GetType(), Primitive::kPrimNot);
     SetRawInputAt(0, input);
+    SetReferenceTypeInfo(upper_bound_);
   }
 
-  const ReferenceTypeInfo& GetBoundType() const { return bound_type_; }
+  // GetUpper* should only be used in reference type propagation.
+  const ReferenceTypeInfo& GetUpperBound() const { return upper_bound_; }
+  bool GetUpperCanBeNull() const { return upper_can_be_null_; }
 
-  bool CanBeNull() const OVERRIDE {
-    // `null instanceof ClassX` always return false so we can't be null.
-    return false;
+  void SetCanBeNull(bool can_be_null) {
+    DCHECK(upper_can_be_null_ || !can_be_null);
+    can_be_null_ = can_be_null;
   }
 
+  bool CanBeNull() const OVERRIDE { return can_be_null_; }
+
   DECLARE_INSTRUCTION(BoundType);
 
  private:
   // Encodes the most upper class that this instruction can have. In other words
-  // it is always the case that GetBoundType().IsSupertypeOf(GetReferenceType()).
-  // It is used to bound the type in cases like `if (x instanceof ClassX) {}`
-  const ReferenceTypeInfo bound_type_;
+  // it is always the case that GetUpperBound().IsSupertypeOf(GetReferenceType()).
+  // It is used to bound the type in cases like:
+  //   if (x instanceof ClassX) {
+  //     // uper_bound_ will be ClassX
+  //   }
+  const ReferenceTypeInfo upper_bound_;
+  // Represents the top constraint that can_be_null_ cannot exceed (i.e. if this
+  // is false then can_be_null_ cannot be true).
+  const bool upper_can_be_null_;
+  bool can_be_null_;
 
   DISALLOW_COPY_AND_ASSIGN(HBoundType);
 };
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index fe3bb1a..f455571 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -29,7 +29,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef HAVE_ANDROID_OS
+#ifndef __ANDROID__
 
 class OptimizingCFITest : public CFITest {
  public:
@@ -125,6 +125,6 @@
 TEST_ISA(kX86)
 TEST_ISA(kX86_64)
 
-#endif  // HAVE_ANDROID_OS
+#endif  // __ANDROID__
 
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 1c0123e..6a50b7d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -369,6 +369,36 @@
   }
 }
 
+static void MaybeRunInliner(HGraph* graph,
+                            CompilerDriver* driver,
+                            OptimizingCompilerStats* stats,
+                            const DexCompilationUnit& dex_compilation_unit,
+                            PassObserver* pass_observer,
+                            StackHandleScopeCollection* handles) {
+  const CompilerOptions& compiler_options = driver->GetCompilerOptions();
+  bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
+      && (compiler_options.GetInlineMaxCodeUnits() > 0);
+  if (!should_inline) {
+    return;
+  }
+
+  ArenaAllocator* arena = graph->GetArena();
+  HInliner* inliner = new (arena) HInliner(
+    graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+  ReferenceTypePropagation* type_propagation =
+    new (arena) ReferenceTypePropagation(graph, handles,
+        "reference_type_propagation_after_inlining");
+
+  HOptimization* optimizations[] = {
+    inliner,
+    // Run another type propagation phase: inlining will open up more opportunities
+    // to remove checkcast/instanceof and null checks.
+    type_propagation,
+  };
+
+  RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
+}
+
 static void RunOptimizations(HGraph* graph,
                              CompilerDriver* driver,
                              OptimizingCompilerStats* stats,
@@ -383,10 +413,6 @@
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
-
-  HInliner* inliner = new (arena) HInliner(
-      graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
-
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
@@ -398,28 +424,29 @@
       graph, stats, "instruction_simplifier_after_types");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_after_bce");
-  ReferenceTypePropagation* type_propagation2 =
-      new (arena) ReferenceTypePropagation(graph, handles);
   InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_before_codegen");
 
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
 
-  HOptimization* optimizations[] = {
+  HOptimization* optimizations1[] = {
     intrinsics,
     fold1,
     simplify1,
     type_propagation,
     dce1,
-    simplify2,
-    inliner,
-    // Run another type propagation phase: inlining will open up more opprotunities
-    // to remove checkast/instanceof and null checks.
-    type_propagation2,
+    simplify2
+  };
+
+  RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
+
+  MaybeRunInliner(graph, driver, stats, dex_compilation_unit, pass_observer, handles);
+
+  HOptimization* optimizations2[] = {
     // BooleanSimplifier depends on the InstructionSimplifier removing redundant
     // suspend checks to recognize empty blocks.
     boolean_simplify,
-    fold2,
+    fold2,  // TODO: if we don't inline we can also skip fold2.
     side_effects,
     gvn,
     licm,
@@ -432,7 +459,7 @@
     simplify4,
   };
 
-  RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
+  RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 }
 
 // The stack map we generate must be 4-byte aligned on ARM. Since existing
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 54ea6f1..f9d812f 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -38,6 +38,20 @@
   // Build up a worklist of moves.
   BuildInitialMoveList(parallel_move);
 
+  // Move stack/stack slot to take advantage of a free register on constrained machines.
+  for (size_t i = 0; i < moves_.Size(); ++i) {
+    const MoveOperands& move = *moves_.Get(i);
+    // Ignore constants and moves already eliminated.
+    if (move.IsEliminated() || move.GetSource().IsConstant()) {
+      continue;
+    }
+
+    if ((move.GetSource().IsStackSlot() || move.GetSource().IsDoubleStackSlot()) &&
+        (move.GetDestination().IsStackSlot() || move.GetDestination().IsDoubleStackSlot())) {
+      PerformMove(i);
+    }
+  }
+
   for (size_t i = 0; i < moves_.Size(); ++i) {
     const MoveOperands& move = *moves_.Get(i);
     // Skip constants to perform them last.  They don't block other moves
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 68316c2..d1c1134 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -25,19 +25,35 @@
 
 class RTPVisitor : public HGraphDelegateVisitor {
  public:
-  RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles)
+  RTPVisitor(HGraph* graph,
+             StackHandleScopeCollection* handles,
+             GrowableArray<HInstruction*>* worklist,
+             ReferenceTypeInfo::TypeHandle object_class_handle,
+             ReferenceTypeInfo::TypeHandle class_class_handle,
+             ReferenceTypeInfo::TypeHandle string_class_handle)
     : HGraphDelegateVisitor(graph),
-      handles_(handles) {}
+      handles_(handles),
+      object_class_handle_(object_class_handle),
+      class_class_handle_(class_class_handle),
+      string_class_handle_(string_class_handle),
+      worklist_(worklist) {}
 
+  void VisitNullConstant(HNullConstant* null_constant) OVERRIDE;
   void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
+  void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
+  void VisitLoadString(HLoadString* instr) OVERRIDE;
   void VisitNewArray(HNewArray* instr) OVERRIDE;
+  void VisitParameterValue(HParameterValue* instr) OVERRIDE;
   void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info);
   void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact);
   void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE;
   void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE;
   void VisitInvoke(HInvoke* instr) OVERRIDE;
   void VisitArrayGet(HArrayGet* instr) OVERRIDE;
+  void VisitCheckCast(HCheckCast* instr) OVERRIDE;
+  void VisitNullCheck(HNullCheck* instr) OVERRIDE;
+  void VisitFakeString(HFakeString* instr) OVERRIDE;
   void UpdateReferenceTypeInfo(HInstruction* instr,
                                uint16_t type_idx,
                                const DexFile& dex_file,
@@ -45,8 +61,33 @@
 
  private:
   StackHandleScopeCollection* handles_;
+  ReferenceTypeInfo::TypeHandle object_class_handle_;
+  ReferenceTypeInfo::TypeHandle class_class_handle_;
+  ReferenceTypeInfo::TypeHandle string_class_handle_;
+  GrowableArray<HInstruction*>* worklist_;
+
+  static constexpr size_t kDefaultWorklistSize = 8;
 };
 
+ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
+                                                   StackHandleScopeCollection* handles,
+                                                   const char* name)
+    : HOptimization(graph, name),
+      handles_(handles),
+      worklist_(graph->GetArena(), kDefaultWorklistSize) {
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  object_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject));
+  string_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangString));
+  class_class_handle_ = handles_->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangClass));
+
+  if (kIsDebugBuild) {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(ReferenceTypeInfo::IsValidHandle(object_class_handle_));
+    DCHECK(ReferenceTypeInfo::IsValidHandle(class_class_handle_));
+    DCHECK(ReferenceTypeInfo::IsValidHandle(string_class_handle_));
+  }
+}
+
 void ReferenceTypePropagation::Run() {
   // To properly propagate type info we need to visit in the dominator-based order.
   // Reverse post order guarantees a node's dominators are visited first.
@@ -55,29 +96,122 @@
     VisitBasicBlock(it.Current());
   }
   ProcessWorklist();
+
+  if (kIsDebugBuild) {
+    // TODO: move this to the graph checker.
+    ScopedObjectAccess soa(Thread::Current());
+    for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) {
+        HInstruction* instr = iti.Current();
+        if (instr->GetType() == Primitive::kPrimNot) {
+          DCHECK(instr->GetReferenceTypeInfo().IsValid())
+              << "Invalid RTI for instruction: " << instr->DebugName();
+          if (instr->IsBoundType()) {
+            DCHECK(instr->AsBoundType()->GetUpperBound().IsValid());
+          } else if (instr->IsLoadClass()) {
+            DCHECK(instr->AsLoadClass()->GetReferenceTypeInfo().IsExact());
+            DCHECK(instr->AsLoadClass()->GetLoadedClassRTI().IsValid());
+          } else if (instr->IsNullCheck()) {
+            DCHECK(instr->GetReferenceTypeInfo().IsEqual(instr->InputAt(0)->GetReferenceTypeInfo()))
+                << "NullCheck " << instr->GetReferenceTypeInfo()
+                << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();
+          }
+        }
+      }
+    }
+  }
 }
 
 void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
-  // TODO: handle other instructions that give type info
-  // (array accesses)
+  RTPVisitor visitor(graph_,
+                     handles_,
+                     &worklist_,
+                     object_class_handle_,
+                     class_class_handle_,
+                     string_class_handle_);
+  // Handle Phis first as there might be instructions in the same block who depend on them.
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    VisitPhi(it.Current()->AsPhi());
+  }
 
-  RTPVisitor visitor(graph_, handles_);
-  // Initialize exact types first for faster convergence.
+  // Handle instructions.
   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* instr = it.Current();
     instr->Accept(&visitor);
   }
 
-  // Handle Phis.
-  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-    VisitPhi(it.Current()->AsPhi());
-  }
-
   // Add extra nodes to bound types.
   BoundTypeForIfNotNull(block);
   BoundTypeForIfInstanceOf(block);
 }
 
+// Create a bound type for the given object narrowing the type as much as possible.
+// The BoundType upper values for the super type and can_be_null will be taken from
+// load_class.GetLoadedClassRTI() and upper_can_be_null.
+static HBoundType* CreateBoundType(ArenaAllocator* arena,
+                                   HInstruction* obj,
+                                   HLoadClass* load_class,
+                                   bool upper_can_be_null)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+  ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
+  ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+  HBoundType* bound_type = new (arena) HBoundType(obj, class_rti, upper_can_be_null);
+  // Narrow the type as much as possible.
+  if (class_rti.GetTypeHandle()->IsFinal()) {
+    bound_type->SetReferenceTypeInfo(
+        ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true));
+  } else if (obj_rti.IsValid() && class_rti.IsSupertypeOf(obj_rti)) {
+    bound_type->SetReferenceTypeInfo(obj_rti);
+  } else {
+    bound_type->SetReferenceTypeInfo(
+        ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
+  }
+  return bound_type;
+}
+
+// Check if we should create a bound type for the given object at the specified
+// position. Because of inlining and the fact we run RTP more than once and we
+// might have a HBoundType already. If we do, we should not create a new one.
+// In this case we also assert that there are no other uses of the object (except
+// the bound type) dominated by the specified dominator_instr or dominator_block.
+static bool ShouldCreateBoundType(HInstruction* position,
+                                  HInstruction* obj,
+                                  ReferenceTypeInfo upper_bound,
+                                  HInstruction* dominator_instr,
+                                  HBasicBlock* dominator_block)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  // If the position where we should insert the bound type is not already a
+  // a bound type then we need to create one.
+  if (position == nullptr || !position->IsBoundType()) {
+    return true;
+  }
+
+  HBoundType* existing_bound_type = position->AsBoundType();
+  if (existing_bound_type->GetUpperBound().IsSupertypeOf(upper_bound)) {
+    if (kIsDebugBuild) {
+      // Check that the existing HBoundType dominates all the uses.
+      for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
+        HInstruction* user = it.Current()->GetUser();
+        if (dominator_instr != nullptr) {
+          DCHECK(!dominator_instr->StrictlyDominates(user)
+              || user == existing_bound_type
+              || existing_bound_type->StrictlyDominates(user));
+        } else if (dominator_block != nullptr) {
+          DCHECK(!dominator_block->Dominates(user->GetBlock())
+              || user == existing_bound_type
+              || existing_bound_type->StrictlyDominates(user));
+        }
+      }
+    }
+  } else {
+    // TODO: if the current bound type is a refinement we could update the
+    // existing_bound_type with the a new upper limit. However, we also need to
+    // update its users and have access to the work list.
+  }
+  return false;
+}
+
 void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) {
   HIf* ifInstruction = block->GetLastInstruction()->AsIf();
   if (ifInstruction == nullptr) {
@@ -116,8 +250,23 @@
     HInstruction* user = it.Current()->GetUser();
     if (notNullBlock->Dominates(user->GetBlock())) {
       if (bound_type == nullptr) {
-        bound_type = new (graph_->GetArena()) HBoundType(obj, ReferenceTypeInfo::CreateTop(false));
-        notNullBlock->InsertInstructionBefore(bound_type, notNullBlock->GetFirstInstruction());
+        ScopedObjectAccess soa(Thread::Current());
+        HInstruction* insert_point = notNullBlock->GetFirstInstruction();
+        ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create(
+            object_class_handle_, /* is_exact */ true);
+        if (ShouldCreateBoundType(insert_point, obj, object_rti, nullptr, notNullBlock)) {
+          bound_type = new (graph_->GetArena()) HBoundType(
+              obj, object_rti, /* bound_can_be_null */ false);
+          if (obj->GetReferenceTypeInfo().IsValid()) {
+            bound_type->SetReferenceTypeInfo(obj->GetReferenceTypeInfo());
+          }
+          notNullBlock->InsertInstructionBefore(bound_type, insert_point);
+        } else {
+          // We already have a bound type on the position we would need to insert
+          // the new one. The existing bound type should dominate all the users
+          // (dchecked) so there's no need to continue.
+          break;
+        }
       }
       user->ReplaceInput(bound_type, it.Current()->GetIndex());
     }
@@ -171,25 +320,23 @@
     HInstruction* user = it.Current()->GetUser();
     if (instanceOfTrueBlock->Dominates(user->GetBlock())) {
       if (bound_type == nullptr) {
+        ScopedObjectAccess soa(Thread::Current());
         HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
-
-        ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
         ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
-        bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti);
-
-        // Narrow the type as much as possible.
-        {
-          ScopedObjectAccess soa(Thread::Current());
-          if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) {
-            bound_type->SetReferenceTypeInfo(obj_rti);
-          } else {
-            bound_type->SetReferenceTypeInfo(
-                ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
-          }
+        HInstruction* insert_point = instanceOfTrueBlock->GetFirstInstruction();
+        if (ShouldCreateBoundType(insert_point, obj, class_rti, nullptr, instanceOfTrueBlock)) {
+          bound_type = CreateBoundType(
+              graph_->GetArena(),
+              obj,
+              load_class,
+              false /* InstanceOf ensures the object is not null. */);
+          instanceOfTrueBlock->InsertInstructionBefore(bound_type, insert_point);
+        } else {
+          // We already have a bound type on the position we would need to insert
+          // the new one. The existing bound type should dominate all the users
+          // (dchecked) so there's no need to continue.
+          break;
         }
-
-        instanceOfTrueBlock->InsertInstructionBefore(
-            bound_type, instanceOfTrueBlock->GetFirstInstruction());
       }
       user->ReplaceInput(bound_type, it.Current()->GetIndex());
     }
@@ -199,11 +346,32 @@
 void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr,
                                     mirror::Class* klass,
                                     bool is_exact) {
-  if (klass != nullptr) {
+  if (instr->IsInvokeStaticOrDirect() && instr->AsInvokeStaticOrDirect()->IsStringInit()) {
+    // Calls to String.<init> are replaced with a StringFactory.
+    if (kIsDebugBuild) {
+      ScopedObjectAccess soa(Thread::Current());
+      ClassLinker* cl = Runtime::Current()->GetClassLinker();
+      mirror::DexCache* dex_cache = cl->FindDexCache(instr->AsInvoke()->GetDexFile());
+      ArtMethod* method = dex_cache->GetResolvedMethod(
+          instr->AsInvoke()->GetDexMethodIndex(), cl->GetImagePointerSize());
+      DCHECK(method != nullptr);
+      mirror::Class* declaring_class = method->GetDeclaringClass();
+      DCHECK(declaring_class != nullptr);
+      DCHECK(declaring_class->IsStringClass())
+          << "Expected String class: " << PrettyDescriptor(declaring_class);
+      DCHECK(method->IsConstructor())
+          << "Expected String.<init>: " << PrettyMethod(method);
+    }
+    instr->SetReferenceTypeInfo(
+        ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
+  } else if (klass != nullptr) {
     ScopedObjectAccess soa(Thread::Current());
-    MutableHandle<mirror::Class> handle = handles_->NewHandle(klass);
+    ReferenceTypeInfo::TypeHandle handle = handles_->NewHandle(klass);
     is_exact = is_exact || klass->IsFinal();
     instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact));
+  } else {
+    instr->SetReferenceTypeInfo(
+        ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
   }
 }
 
@@ -219,6 +387,13 @@
   SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
 }
 
+void RTPVisitor::VisitNullConstant(HNullConstant* instr) {
+  // TODO: The null constant could be bound contextually (e.g. based on return statements)
+  // to a more precise type.
+  instr->SetReferenceTypeInfo(
+      ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+}
+
 void RTPVisitor::VisitNewInstance(HNewInstance* instr) {
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
@@ -227,6 +402,13 @@
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
 
+void RTPVisitor::VisitParameterValue(HParameterValue* instr) {
+  if (instr->GetType() == Primitive::kPrimNot) {
+    // TODO: parse the signature and add precise types for the parameters.
+    SetClassAsTypeInfo(instr, nullptr, /* is_exact */ false);
+  }
+}
+
 void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr,
                                            const FieldInfo& info) {
   // The field index is unknown only during tests.
@@ -238,10 +420,10 @@
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
   mirror::DexCache* dex_cache = cl->FindDexCache(info.GetDexFile());
   ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), dex_cache);
-  if (field != nullptr) {
-    mirror::Class* klass = field->GetType<false>();
-    SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
-  }
+  // TODO: There are certain cases where we can't resolve the field.
+  // b/21914925 is open to keep track of a repro case for this issue.
+  mirror::Class* klass = (field == nullptr) ? nullptr : field->GetType<false>();
+  SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
 }
 
 void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) {
@@ -258,12 +440,60 @@
       Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile());
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
-  if (resolved_class != nullptr) {
-    Handle<mirror::Class> handle = handles_->NewHandle(resolved_class);
-    instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true));
+  // TODO: investigating why we are still getting unresolved classes: b/22821472.
+  ReferenceTypeInfo::TypeHandle handle = (resolved_class != nullptr)
+    ? handles_->NewHandle(resolved_class)
+    : object_class_handle_;
+  instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true));
+  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_class_handle_, /* is_exact */ true));
+}
+
+void RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
+  instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());
+}
+
+void RTPVisitor::VisitLoadString(HLoadString* instr) {
+  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
+}
+
+void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
+  ScopedObjectAccess soa(Thread::Current());
+  ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+  DCHECK(parent_rti.IsValid());
+  instr->SetReferenceTypeInfo(parent_rti);
+}
+
+void RTPVisitor::VisitFakeString(HFakeString* instr) {
+  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true));
+}
+
+void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
+  HInstruction* obj = check_cast->InputAt(0);
+  HBoundType* bound_type = nullptr;
+  for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
+    HInstruction* user = it.Current()->GetUser();
+    if (check_cast->StrictlyDominates(user)) {
+      if (bound_type == nullptr) {
+        ScopedObjectAccess soa(Thread::Current());
+        HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
+        ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+        if (ShouldCreateBoundType(check_cast->GetNext(), obj, class_rti, check_cast, nullptr)) {
+          bound_type = CreateBoundType(
+              GetGraph()->GetArena(),
+              obj,
+              load_class,
+              true /* CheckCast succeeds for nulls. */);
+          check_cast->GetBlock()->InsertInstructionAfter(bound_type, check_cast);
+        } else {
+          // We already have a bound type on the position we would need to insert
+          // the new one. The existing bound type should dominate all the users
+          // (dchecked) so there's no need to continue.
+          break;
+        }
+      }
+      user->ReplaceInput(bound_type, it.Current()->GetIndex());
+    }
   }
-  Handle<mirror::Class> class_handle = handles_->NewHandle(mirror::Class::GetJavaLangClass());
-  instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_handle, /* is_exact */ true));
 }
 
 void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
@@ -290,29 +520,54 @@
 
 ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a,
                                                        const ReferenceTypeInfo& b) {
-  bool is_exact = a.IsExact() && b.IsExact();
-  bool is_top = a.IsTop() || b.IsTop();
-  Handle<mirror::Class> type_handle;
-
-  if (!is_top) {
-    if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) {
-      type_handle = a.GetTypeHandle();
-    } else if (a.IsSupertypeOf(b)) {
-      type_handle = a.GetTypeHandle();
-      is_exact = false;
-    } else if (b.IsSupertypeOf(a)) {
-      type_handle = b.GetTypeHandle();
-      is_exact = false;
-    } else {
-      // TODO: Find a common super class.
-      is_top = true;
-      is_exact = false;
-    }
+  if (!b.IsValid()) {
+    return a;
+  }
+  if (!a.IsValid()) {
+    return b;
   }
 
-  return is_top
-      ? ReferenceTypeInfo::CreateTop(is_exact)
-      : ReferenceTypeInfo::Create(type_handle, is_exact);
+  bool is_exact = a.IsExact() && b.IsExact();
+  Handle<mirror::Class> type_handle;
+
+  if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) {
+    type_handle = a.GetTypeHandle();
+  } else if (a.IsSupertypeOf(b)) {
+    type_handle = a.GetTypeHandle();
+    is_exact = false;
+  } else if (b.IsSupertypeOf(a)) {
+    type_handle = b.GetTypeHandle();
+    is_exact = false;
+  } else {
+    // TODO: Find the first common super class.
+    type_handle = object_class_handle_;
+    is_exact = false;
+  }
+
+  return ReferenceTypeInfo::Create(type_handle, is_exact);
+}
+
+static void UpdateArrayGet(HArrayGet* instr,
+                           StackHandleScopeCollection* handles,
+                           ReferenceTypeInfo::TypeHandle object_class_handle)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK_EQ(Primitive::kPrimNot, instr->GetType());
+
+  ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+  DCHECK(parent_rti.IsValid());
+
+  Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
+  if (handle->IsObjectArrayClass()) {
+    ReferenceTypeInfo::TypeHandle component_handle = handles->NewHandle(handle->GetComponentType());
+    instr->SetReferenceTypeInfo(
+        ReferenceTypeInfo::Create(component_handle, /* is_exact */ false));
+  } else {
+    // We don't know what the parent actually is, so we fallback to object.
+    instr->SetReferenceTypeInfo(
+        ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false));
+  }
+
+  return;
 }
 
 bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
@@ -323,6 +578,15 @@
     UpdateBoundType(instr->AsBoundType());
   } else if (instr->IsPhi()) {
     UpdatePhi(instr->AsPhi());
+  } else if (instr->IsNullCheck()) {
+    ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+    if (parent_rti.IsValid()) {
+      instr->SetReferenceTypeInfo(parent_rti);
+    }
+  } else if (instr->IsArrayGet()) {
+    // TODO: consider if it's worth "looking back" and bounding the input object
+    // to an array type.
+    UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
   } else {
     LOG(FATAL) << "Invalid instruction (should not get here)";
   }
@@ -340,45 +604,45 @@
   mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile());
   ArtMethod* method = dex_cache->GetResolvedMethod(
       instr->GetDexMethodIndex(), cl->GetImagePointerSize());
-  if (method != nullptr) {
-    mirror::Class* klass = method->GetReturnType(false);
-    SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
-  }
+  mirror::Class* klass = (method == nullptr) ? nullptr : method->GetReturnType(false);
+  SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
 }
 
 void RTPVisitor::VisitArrayGet(HArrayGet* instr) {
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
-
-  HInstruction* parent = instr->InputAt(0);
   ScopedObjectAccess soa(Thread::Current());
-  Handle<mirror::Class> handle = parent->GetReferenceTypeInfo().GetTypeHandle();
-  if (handle.GetReference() != nullptr && handle->IsObjectArrayClass()) {
-    SetClassAsTypeInfo(instr, handle->GetComponentType(), /* is_exact */ false);
+  UpdateArrayGet(instr, handles_, object_class_handle_);
+  if (!instr->GetReferenceTypeInfo().IsValid()) {
+    worklist_->Add(instr);
   }
 }
 
 void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) {
   ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  // Be sure that we don't go over the bounded type.
-  ReferenceTypeInfo bound_rti = instr->GetBoundType();
-  if (!bound_rti.IsSupertypeOf(new_rti)) {
-    new_rti = bound_rti;
+  if (!new_rti.IsValid()) {
+    return;  // No new info yet.
+  }
+
+  // Make sure that we don't go over the bounded type.
+  ReferenceTypeInfo upper_bound_rti = instr->GetUpperBound();
+  if (!upper_bound_rti.IsSupertypeOf(new_rti)) {
+    new_rti = upper_bound_rti;
   }
   instr->SetReferenceTypeInfo(new_rti);
 }
 
 void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
   ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  if (new_rti.IsTop() && !new_rti.IsExact()) {
-    // Early return if we are Top and inexact.
+  if (new_rti.IsValid() && new_rti.IsObjectClass() && !new_rti.IsExact()) {
+    // Early return if we are Object and inexact.
     instr->SetReferenceTypeInfo(new_rti);
     return;
   }
   for (size_t i = 1; i < instr->InputCount(); i++) {
     new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo());
-    if (new_rti.IsTop()) {
+    if (new_rti.IsValid() && new_rti.IsObjectClass()) {
       if (!new_rti.IsExact()) {
         break;
       } else {
@@ -392,21 +656,31 @@
 // Re-computes and updates the nullability of the instruction. Returns whether or
 // not the nullability was changed.
 bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) {
-  DCHECK(instr->IsPhi() || instr->IsBoundType());
+  DCHECK(instr->IsPhi()
+      || instr->IsBoundType()
+      || instr->IsNullCheck()
+      || instr->IsArrayGet());
 
-  if (!instr->IsPhi()) {
+  if (!instr->IsPhi() && !instr->IsBoundType()) {
     return false;
   }
 
-  HPhi* phi = instr->AsPhi();
-  bool existing_can_be_null = phi->CanBeNull();
-  bool new_can_be_null = false;
-  for (size_t i = 0; i < phi->InputCount(); i++) {
-    new_can_be_null |= phi->InputAt(i)->CanBeNull();
+  bool existing_can_be_null = instr->CanBeNull();
+  if (instr->IsPhi()) {
+    HPhi* phi = instr->AsPhi();
+    bool new_can_be_null = false;
+    for (size_t i = 0; i < phi->InputCount(); i++) {
+      if (phi->InputAt(i)->CanBeNull()) {
+        new_can_be_null = true;
+        break;
+      }
+    }
+    phi->SetCanBeNull(new_can_be_null);
+  } else if (instr->IsBoundType()) {
+    HBoundType* bound_type = instr->AsBoundType();
+    bound_type->SetCanBeNull(instr->InputAt(0)->CanBeNull() && bound_type->GetUpperCanBeNull());
   }
-  phi->SetCanBeNull(new_can_be_null);
-
-  return existing_can_be_null != new_can_be_null;
+  return existing_can_be_null != instr->CanBeNull();
 }
 
 void ReferenceTypePropagation::ProcessWorklist() {
@@ -419,14 +693,18 @@
 }
 
 void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) {
-  DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) << instruction->GetType();
+  DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot)
+      << instruction->DebugName() << ":" << instruction->GetType();
   worklist_.Add(instruction);
 }
 
 void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
   for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
     HInstruction* user = it.Current()->GetUser();
-    if (user->IsPhi() || user->IsBoundType()) {
+    if (user->IsPhi()
+       || user->IsBoundType()
+       || user->IsNullCheck()
+       || (user->IsArrayGet() && (user->GetType() == Primitive::kPrimNot))) {
       AddToWorklist(user);
     }
   }
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 11f5ac9..14d4a82 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -30,10 +30,9 @@
  */
 class ReferenceTypePropagation : public HOptimization {
  public:
-  ReferenceTypePropagation(HGraph* graph, StackHandleScopeCollection* handles)
-    : HOptimization(graph, kReferenceTypePropagationPassName),
-      handles_(handles),
-      worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+  ReferenceTypePropagation(HGraph* graph,
+                           StackHandleScopeCollection* handles,
+                           const char* name = kReferenceTypePropagationPassName);
 
   void Run() OVERRIDE;
 
@@ -60,6 +59,10 @@
 
   GrowableArray<HInstruction*> worklist_;
 
+  ReferenceTypeInfo::TypeHandle object_class_handle_;
+  ReferenceTypeInfo::TypeHandle class_class_handle_;
+  ReferenceTypeInfo::TypeHandle string_class_handle_;
+
   static constexpr size_t kDefaultWorklistSize = 8;
 
   DISALLOW_COPY_AND_ASSIGN(ReferenceTypePropagation);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index ff2e6ad..2c34e4d 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -570,7 +570,9 @@
   if (instruction->GetBlock()->IsInTry() && instruction->CanThrow()) {
     HTryBoundary* try_block = instruction->GetBlock()->GetTryEntry();
     for (HExceptionHandlerIterator it(*try_block); !it.Done(); it.Advance()) {
-      GrowableArray<HInstruction*>* handler_locals = GetLocalsFor(it.Current());
+      HBasicBlock* handler = it.Current();
+      handler->AddExceptionalPredecessor(instruction);
+      GrowableArray<HInstruction*>* handler_locals = GetLocalsFor(handler);
       for (size_t i = 0, e = current_locals_->Size(); i < e; ++i) {
         HInstruction* local_value = current_locals_->Get(i);
         if (local_value != nullptr) {
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 004853f..84f5cb1 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -1011,4 +1011,12 @@
             __ GetAdjustedPosition(label.Position()));
 }
 
+TEST_F(AssemblerThumb2Test, Clz) {
+  __ clz(arm::R0, arm::R1);
+
+  const char* expected = "clz r0, r1\n";
+
+  DriverStr(expected, "clz");
+}
+
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 20f61f9..cb01cea 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -32,7 +32,7 @@
 // Include results file (generated manually)
 #include "assembler_thumb_test_expected.cc.inc"
 
-#ifndef HAVE_ANDROID_OS
+#ifndef __ANDROID__
 // This controls whether the results are printed to the
 // screen or compared against the expected output.
 // To generate new expected output, set this to true and
@@ -72,7 +72,7 @@
 }
 
 std::string GetToolsDir() {
-#ifndef HAVE_ANDROID_OS
+#ifndef __ANDROID__
   // This will only work on the host.  There is no as, objcopy or objdump on the device.
   static std::string toolsdir;
 
@@ -89,7 +89,7 @@
 }
 
 void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) {
-#ifndef HAVE_ANDROID_OS
+#ifndef __ANDROID__
   static std::string toolsdir = GetToolsDir();
 
   ScratchFile file;
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 44efc65..8c2a3ed 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -145,6 +145,13 @@
   EmitLabel(lbl, dst.length_ + 5);
 }
 
+void X86Assembler::movntl(const Address& dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xC3);
+  EmitOperand(src, dst);
+}
+
 void X86Assembler::bswapl(Register dst) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1194,11 +1201,26 @@
 }
 
 
-void X86Assembler::imull(Register reg, const Immediate& imm) {
+void X86Assembler::imull(Register dst, Register src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitUint8(0x69);
-  EmitOperand(reg, Operand(reg));
-  EmitImmediate(imm);
+  // See whether imm can be represented as a sign-extended 8bit value.
+  int32_t v32 = static_cast<int32_t>(imm.value());
+  if (IsInt<8>(v32)) {
+    // Sign-extension works.
+    EmitUint8(0x6B);
+    EmitOperand(dst, Operand(src));
+    EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
+  } else {
+    // Not representable, use full immediate.
+    EmitUint8(0x69);
+    EmitOperand(dst, Operand(src));
+    EmitImmediate(imm);
+  }
+}
+
+
+void X86Assembler::imull(Register reg, const Immediate& imm) {
+  imull(reg, reg, imm);
 }
 
 
@@ -1523,6 +1545,13 @@
 }
 
 
+void X86Assembler::repe_cmpsl() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0xA7);
+}
+
+
 X86Assembler* X86Assembler::lock() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF0);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index e2abcde..d9c1b40 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -231,6 +231,8 @@
   void movl(const Address& dst, const Immediate& imm);
   void movl(const Address& dst, Label* lbl);
 
+  void movntl(const Address& dst, Register src);
+
   void bswapl(Register dst);
 
   void movzxb(Register dst, ByteRegister src);
@@ -409,6 +411,7 @@
 
   void imull(Register dst, Register src);
   void imull(Register reg, const Immediate& imm);
+  void imull(Register dst, Register src, const Immediate& imm);
   void imull(Register reg, const Address& address);
 
   void imull(Register reg);
@@ -466,6 +469,7 @@
 
   void repne_scasw();
   void repe_cmpsw();
+  void repe_cmpsl();
 
   X86Assembler* lock();
   void cmpxchgl(const Address& address, Register reg);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 0e8c4ae..b664d23 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -105,6 +105,16 @@
   DriverStr(expected, "movl");
 }
 
+TEST_F(AssemblerX86Test, Movntl) {
+  GetAssembler()->movntl(x86::Address(x86::EDI, x86::EBX, x86::TIMES_4, 12), x86::EAX);
+  GetAssembler()->movntl(x86::Address(x86::EDI, 0), x86::EAX);
+  const char* expected =
+    "movntil %EAX, 0xc(%EDI,%EBX,4)\n"
+    "movntil %EAX, (%EDI)\n";
+
+  DriverStr(expected, "movntl");
+}
+
 TEST_F(AssemblerX86Test, psrlq) {
   GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32));
   const char* expected = "psrlq $0x20, %xmm0\n";
@@ -202,4 +212,10 @@
   DriverStr(expected, "Repecmpsw");
 }
 
+TEST_F(AssemblerX86Test, Repecmpsl) {
+  GetAssembler()->repe_cmpsl();
+  const char* expected = "repe cmpsl\n";
+  DriverStr(expected, "Repecmpsl");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 93c90db..22e7b9b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -194,6 +194,21 @@
   EmitImmediate(imm);
 }
 
+void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(src, dst);
+  EmitUint8(0x0F);
+  EmitUint8(0xC3);
+  EmitOperand(src.LowBits(), dst);
+}
+
+void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(src, dst);
+  EmitUint8(0x0F);
+  EmitUint8(0xC3);
+  EmitOperand(src.LowBits(), dst);
+}
 
 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
   cmov(c, dst, src, true);
@@ -1672,28 +1687,33 @@
   EmitOperand(dst.LowBits(), Operand(src));
 }
 
-void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
+void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // imull only supports 32b immediate.
 
-  EmitOptionalRex32(reg, reg);
+  EmitOptionalRex32(dst, src);
 
   // See whether imm can be represented as a sign-extended 8bit value.
   int32_t v32 = static_cast<int32_t>(imm.value());
   if (IsInt<8>(v32)) {
     // Sign-extension works.
     EmitUint8(0x6B);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(src));
     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
   } else {
     // Not representable, use full immediate.
     EmitUint8(0x69);
-    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitOperand(dst.LowBits(), Operand(src));
     EmitImmediate(imm);
   }
 }
 
 
+void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
+  imull(reg, reg, imm);
+}
+
+
 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg, address);
@@ -2081,6 +2101,21 @@
 }
 
 
+void X86_64Assembler::repe_cmpsl() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitUint8(0xA7);
+}
+
+
+void X86_64Assembler::repe_cmpsq() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xF3);
+  EmitRex64();
+  EmitUint8(0xA7);
+}
+
+
 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
   // TODO: Need to have a code constants table.
   int64_t constant = bit_cast<int64_t, double>(value);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 0cd3197..b8e5fb6 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -326,6 +326,9 @@
   void movq(CpuRegister dst, CpuRegister src);
   void movl(CpuRegister dst, CpuRegister src);
 
+  void movntl(const Address& dst, CpuRegister src);
+  void movntq(const Address& dst, CpuRegister src);
+
   void movq(CpuRegister dst, const Address& src);
   void movl(CpuRegister dst, const Address& src);
   void movq(const Address& dst, CpuRegister src);
@@ -539,6 +542,7 @@
 
   void imull(CpuRegister dst, CpuRegister src);
   void imull(CpuRegister reg, const Immediate& imm);
+  void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
   void imull(CpuRegister reg, const Address& address);
 
   void imulq(CpuRegister src);
@@ -604,6 +608,8 @@
 
   void repne_scasw();
   void repe_cmpsw();
+  void repe_cmpsl();
+  void repe_cmpsq();
 
   //
   // Macros for High-level operations.
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 422138c..296487e 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -35,7 +35,7 @@
   ASSERT_EQ(static_cast<size_t>(5), buffer.Size());
 }
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 static constexpr size_t kRandomIterations = 1000;  // Devices might be puny, don't stress them...
 #else
 static constexpr size_t kRandomIterations = 100000;  // Hosts are pretty powerful.
@@ -674,6 +674,46 @@
   DriverStr(expected, "movq");
 }
 
+TEST_F(AssemblerX86_64Test, Movntl) {
+  GetAssembler()->movntl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movntl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movntl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movntl(x86_64::Address(x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movntl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), x86_64::CpuRegister(x86_64::R9));
+  const char* expected =
+    "movntil %EAX, 0xc(%RDI,%RBX,4)\n"
+    "movntil %EAX, 0xc(%RDI,%R9,4)\n"
+    "movntil %EAX, 0xc(%RDI,%R9,4)\n"
+    "movntil %EAX, (%R13)\n"
+    "movntil %R9d, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "movntl");
+}
+
+TEST_F(AssemblerX86_64Test, Movntq) {
+  GetAssembler()->movntq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movntq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movntq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movntq(x86_64::Address(x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RAX));
+  GetAssembler()->movntq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), x86_64::CpuRegister(x86_64::R9));
+  const char* expected =
+    "movntiq %RAX, 0xc(%RDI,%RBX,4)\n"
+    "movntiq %RAX, 0xc(%RDI,%R9,4)\n"
+    "movntiq %RAX, 0xc(%RDI,%R9,4)\n"
+    "movntiq %RAX, (%R13)\n"
+    "movntiq %R9, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "movntq");
+}
+
 TEST_F(AssemblerX86_64Test, Cvtsi2ssAddr) {
   GetAssembler()->cvtsi2ss(x86_64::XmmRegister(x86_64::XMM0),
                            x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
@@ -1269,4 +1309,16 @@
   DriverStr(expected, "Repecmpsw");
 }
 
+TEST_F(AssemblerX86_64Test, Repecmpsl) {
+  GetAssembler()->repe_cmpsl();
+  const char* expected = "repe cmpsl\n";
+  DriverStr(expected, "Repecmpsl");
+}
+
+TEST_F(AssemblerX86_64Test, Repecmpsq) {
+  GetAssembler()->repe_cmpsq();
+  const char* expected = "repe cmpsq\n";
+  DriverStr(expected, "Repecmpsq");
+}
+
 }  // namespace art
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index bffb3b5..75d6137 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -280,6 +280,18 @@
   UsageError("      Example: --num-dex-method=%d", CompilerOptions::kDefaultNumDexMethodsThreshold);
   UsageError("      Default: %d", CompilerOptions::kDefaultNumDexMethodsThreshold);
   UsageError("");
+  UsageError("  --inline-depth-limit=<depth-limit>: the depth limit of inlining for fine tuning");
+  UsageError("      the compiler. A zero value will disable inlining. Honored only by Optimizing.");
+  UsageError("      Example: --inline-depth-limit=%d", CompilerOptions::kDefaultInlineDepthLimit);
+  UsageError("      Default: %d", CompilerOptions::kDefaultInlineDepthLimit);
+  UsageError("");
+  UsageError("  --inline-max-code-units=<code-units-count>: the maximum code units that a method");
+  UsageError("      can have to be considered for inlining. A zero value will disable inlining.");
+  UsageError("      Honored only by Optimizing.");
+  UsageError("      Example: --inline-max-code-units=%d",
+             CompilerOptions::kDefaultInlineMaxCodeUnits);
+  UsageError("      Default: %d", CompilerOptions::kDefaultInlineMaxCodeUnits);
+  UsageError("");
   UsageError("  --dump-timing: display a breakdown of where time was spent");
   UsageError("");
   UsageError("  --include-patch-information: Include patching information so the generated code");
@@ -550,6 +562,8 @@
     int small_method_threshold = CompilerOptions::kDefaultSmallMethodThreshold;
     int tiny_method_threshold = CompilerOptions::kDefaultTinyMethodThreshold;
     int num_dex_methods_threshold = CompilerOptions::kDefaultNumDexMethodsThreshold;
+    int inline_depth_limit = CompilerOptions::kDefaultInlineDepthLimit;
+    int inline_max_code_units = CompilerOptions::kDefaultInlineMaxCodeUnits;
 
     // Profile file to use
     double top_k_profile_threshold = CompilerOptions::kDefaultTopKProfileThreshold;
@@ -720,6 +734,22 @@
         if (num_dex_methods_threshold < 0) {
           Usage("--num-dex-methods passed a negative value %s", num_dex_methods_threshold);
         }
+      } else if (option.starts_with("--inline-depth-limit=")) {
+        const char* limit = option.substr(strlen("--inline-depth-limit=")).data();
+        if (!ParseInt(limit, &inline_depth_limit)) {
+          Usage("Failed to parse --inline-depth-limit '%s' as an integer", limit);
+        }
+        if (inline_depth_limit < 0) {
+          Usage("--inline-depth-limit passed a negative value %s", inline_depth_limit);
+        }
+      } else if (option.starts_with("--inline-max-code-units=")) {
+        const char* code_units = option.substr(strlen("--inline-max-code-units=")).data();
+        if (!ParseInt(code_units, &inline_max_code_units)) {
+          Usage("Failed to parse --inline-max-code-units '%s' as an integer", code_units);
+        }
+        if (inline_max_code_units < 0) {
+          Usage("--inline-max-code-units passed a negative value %s", inline_max_code_units);
+        }
       } else if (option == "--host") {
         is_host_ = true;
       } else if (option == "--runtime-arg") {
@@ -992,6 +1022,8 @@
                                                 small_method_threshold,
                                                 tiny_method_threshold,
                                                 num_dex_methods_threshold,
+                                                inline_depth_limit,
+                                                inline_max_code_units,
                                                 include_patch_information,
                                                 top_k_profile_threshold,
                                                 debuggable,
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 31e653b..d1d3481 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -1455,6 +1455,20 @@
           }  // else unknown instruction
           break;
         }
+        case 0x2B: {  // 0101011
+          //  CLZ - 111 11 0101011 mmmm 1111 dddd 1000 mmmm
+          if ((instr & 0xf0f0) == 0xf080) {
+            opcode << "clz";
+            ArmRegister Rm(instr, 0);
+            ArmRegister Rd(instr, 8);
+            args << Rd << ", " << Rm;
+            ArmRegister Rm2(instr, 16);
+            if (Rm.r != Rm2.r || Rm.r == 13 || Rm.r == 15 || Rd.r == 13 || Rd.r == 15) {
+              args << " (UNPREDICTABLE)";
+            }
+          }
+          break;
+        }
       default:      // more formats
         if ((op2 >> 4) == 2) {      // 010xxxx
           // data processing (register)
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 2ead4a2..44787a7 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -1117,6 +1117,9 @@
       opcode1 = opcode_tmp.c_str();
     }
     break;
+  case 0xA7:
+    opcode1 = (prefix[2] == 0x66 ? "cmpsw" : "cmpsl");
+    break;
   case 0xAF:
     opcode1 = (prefix[2] == 0x66 ? "scasw" : "scasl");
     break;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index b8b6a5f..99140d4 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -159,7 +159,7 @@
 
   void WalkOatDexFile(const OatFile::OatDexFile* oat_dex_file, Callback callback) {
     std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
+    std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(outof(error_msg)));
     if (dex_file.get() == nullptr) {
       return;
     }
@@ -504,7 +504,7 @@
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != nullptr);
       std::string error_msg;
-      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
+      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(outof(error_msg)));
       if (dex_file.get() == nullptr) {
         LOG(WARNING) << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation()
             << "': " << error_msg;
@@ -533,7 +533,7 @@
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
       CHECK(oat_dex_file != nullptr);
       std::string error_msg;
-      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(&error_msg));
+      std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(outof(error_msg)));
       if (dex_file.get() == nullptr) {
         LOG(WARNING) << "Failed to open dex file '" << oat_dex_file->GetDexFileLocation()
             << "': " << error_msg;
@@ -593,7 +593,7 @@
     // Create the verifier early.
 
     std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
+    std::unique_ptr<const DexFile> dex_file(oat_dex_file.OpenDexFile(outof(error_msg)));
     if (dex_file.get() == nullptr) {
       os << "NOT FOUND: " << error_msg << "\n\n";
       os << std::flush;
@@ -638,7 +638,7 @@
     std::string error_msg;
     std::string dex_file_location = oat_dex_file.GetDexFileLocation();
 
-    std::unique_ptr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
+    std::unique_ptr<const DexFile> dex_file(oat_dex_file.OpenDexFile(outof(error_msg)));
     if (dex_file == nullptr) {
       os << "Failed to open dex file '" << dex_file_location << "': " << error_msg;
       return false;
@@ -1553,7 +1553,7 @@
     if (oat_file == nullptr) {
       oat_file = OatFile::Open(oat_location, oat_location,
                                nullptr, nullptr, false, nullptr,
-                               &error_msg);
+                               outof(error_msg));
       if (oat_file == nullptr) {
         os << "NOT FOUND: " << error_msg << "\n";
         return false;
@@ -2321,7 +2321,7 @@
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   for (const OatFile::OatDexFile* odf : oat_file->GetOatDexFiles()) {
     std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file = odf->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> dex_file = odf->OpenDexFile(outof(error_msg));
     CHECK(dex_file != nullptr) << error_msg;
     class_linker->RegisterDexFile(*dex_file);
     dex_files.push_back(std::move(dex_file));
@@ -2361,7 +2361,7 @@
                    std::ostream* os) {
   std::string error_msg;
   OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, nullptr, nullptr, false,
-                                    nullptr, &error_msg);
+                                    nullptr, outof(error_msg));
   if (oat_file == nullptr) {
     fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
     return EXIT_FAILURE;
@@ -2377,7 +2377,7 @@
 static int SymbolizeOat(const char* oat_filename, std::string& output_name) {
   std::string error_msg;
   OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, nullptr, nullptr, false,
-                                    nullptr, &error_msg);
+                                    nullptr, outof(error_msg));
   if (oat_file == nullptr) {
     fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
     return EXIT_FAILURE;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index fe79e72..8f70d30 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -39,6 +39,7 @@
   base/unix_file/random_access_file_utils.cc \
   check_jni.cc \
   class_linker.cc \
+  class_table.cc \
   common_throws.cc \
   debugger.cc \
   dex_file.cc \
@@ -340,10 +341,13 @@
 
 LIBART_CFLAGS := -DBUILDING_LIBART=1
 
+LIBART_TARGET_CFLAGS :=
+LIBART_HOST_CFLAGS :=
+
 ifeq ($(MALLOC_IMPL),dlmalloc)
-  LIBART_CFLAGS += -DUSE_DLMALLOC
+  LIBART_TARGET_CFLAGS += -DUSE_DLMALLOC
 else
-  LIBART_CFLAGS += -DUSE_JEMALLOC
+  LIBART_TARGET_CFLAGS += -DUSE_JEMALLOC
 endif
 
 # Default dex2oat instruction set features.
@@ -389,13 +393,6 @@
   art_static_or_shared := $(3)
 
   include $$(CLEAR_VARS)
-  # Clang assembler has problem with macros in asm_support_x86.S, http://b/17443165,
-  # on linux. Yet sdk on mac needs integrated assembler.
-  ifeq ($$(HOST_OS),darwin)
-    LOCAL_CLANG_ASFLAGS += -integrated-as
-  else
-    LOCAL_CLANG_ASFLAGS += -no-integrated-as
-  endif
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart
@@ -439,8 +436,10 @@
   LOCAL_CFLAGS := $$(LIBART_CFLAGS)
   LOCAL_LDFLAGS := $$(LIBART_LDFLAGS)
   ifeq ($$(art_target_or_host),target)
+    LOCAL_CFLAGS += $$(LIBART_TARGET_CFLAGS)
     LOCAL_LDFLAGS += $$(LIBART_TARGET_LDFLAGS)
   else #host
+    LOCAL_CFLAGS += $$(LIBART_HOST_CFLAGS)
     LOCAL_LDFLAGS += $$(LIBART_HOST_LDFLAGS)
     ifeq ($$(art_static_or_shared),static)
       LOCAL_LDFLAGS += -static
@@ -580,4 +579,6 @@
 LIBART_HOST_SRC_FILES_64 :=
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES :=
 LIBART_CFLAGS :=
+LIBART_TARGET_CFLAGS :=
+LIBART_HOST_CFLAGS :=
 build-libart :=
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index 665d2a3..44c7649 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -50,6 +50,11 @@
 // generated at END.
 .macro DEF_ENTRY thumb_or_arm, name
     \thumb_or_arm
+// Clang ignores .thumb_func and requires an explicit .thumb. Investigate whether we should still
+// carry around the .thumb_func.
+    .ifc \thumb_or_arm, .thumb_func
+        .thumb
+    .endif
     .type \name, #function
     .hidden \name  // Hide this as a global symbol, so we do not incur plt calls.
     .global \name
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 2f2654d..be9af98 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -171,6 +171,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = artReadBarrierSlow;
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index f8590d3..28d1942 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -16,7 +16,7 @@
 
 #include "instruction_set_features_arm.h"
 
-#if defined(HAVE_ANDROID_OS) && defined(__arm__)
+#if defined(__ANDROID__) && defined(__arm__)
 #include <sys/auxv.h>
 #include <asm/hwcap.h>
 #endif
@@ -166,7 +166,7 @@
   bool has_div = false;
   bool has_lpae = false;
 
-#if defined(HAVE_ANDROID_OS) && defined(__arm__)
+#if defined(__ANDROID__) && defined(__arm__)
   uint64_t hwcaps = getauxval(AT_HWCAP);
   LOG(INFO) << "hwcaps=" << hwcaps;
   if ((hwcaps & HWCAP_IDIVT) != 0) {
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 2000110..f6d954f 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -51,7 +51,6 @@
     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     .cfi_adjust_cfa_offset 12
     RUNTIME_CURRENT1 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ldr \rTemp1, [\rTemp1, #RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kSaveAll Method*.
     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
@@ -79,7 +78,6 @@
     sub sp, #4                                    @ bottom word will hold Method*
     .cfi_adjust_cfa_offset 4
     RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
@@ -139,7 +137,6 @@
 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
     RUNTIME_CURRENT3 \rTemp1, \rTemp2  @ Load Runtime::Current into rTemp1.
-    THIS_LOAD_REQUIRES_READ_BARRIER
      @ rTemp1 is kRefsAndArgs Method*.
     ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
@@ -171,7 +168,6 @@
     .cfi_adjust_cfa_offset -40
 .endm
 
-
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz   r0, 1f              @ result non-zero branch over
     bx     lr                  @ return
@@ -588,6 +584,59 @@
     bkpt
 END art_quick_check_cast
 
+// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
+.macro POP_REG_NE rReg, offset, rExclude
+    .ifnc \rReg, \rExclude
+        ldr \rReg, [sp, #\offset]   @ restore rReg
+        .cfi_restore \rReg
+    .endif
+.endm
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+.macro READ_BARRIER rDest, rObj, offset
+#ifdef USE_READ_BARRIER
+    push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
+    .cfi_adjust_cfa_offset 24
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset ip, 16
+    .cfi_rel_offset lr, 20
+    sub sp, #8                      @ push padding
+    .cfi_adjust_cfa_offset 8
+    @ mov r0, r0                    @ pass ref in r0 (no-op for now since parameter ref is unused)
+    .ifnc \rObj, r1
+        mov r1, \rObj               @ pass rObj
+    .endif
+    mov r2, #\offset                @ pass offset
+    bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
+    @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
+    .ifnc \rDest, r0
+        mov \rDest, r0              @ save return value in rDest
+    .endif
+    add sp, #8                      @ pop padding
+    .cfi_adjust_cfa_offset -8
+    POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
+    POP_REG_NE r1, 4, \rDest
+    POP_REG_NE r2, 8, \rDest
+    POP_REG_NE r3, 12, \rDest
+    POP_REG_NE ip, 16, \rDest
+    add sp, #20
+    .cfi_adjust_cfa_offset -20
+    pop {lr}                        @ restore lr
+    .cfi_adjust_cfa_offset -4
+    .cfi_restore lr
+#else
+    ldr \rDest, [\rObj, #\offset]
+    UNPOISON_HEAP_REF \rDest
+#endif  // USE_READ_BARRIER
+.endm
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -609,15 +658,21 @@
     b art_quick_throw_array_bounds
 END art_quick_aput_obj_with_bound_check
 
+#ifdef USE_READ_BARRIER
+    .extern artReadBarrierSlow
+#endif
     .hidden art_quick_aput_obj
 ENTRY art_quick_aput_obj
+#ifdef USE_READ_BARRIER
+    @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro.
+    tst r2, r2
+    beq .Ldo_aput_null
+#else
     cbz r2, .Ldo_aput_null
-    ldr r3, [r0, #MIRROR_OBJECT_CLASS_OFFSET]
-    UNPOISON_HEAP_REF r3
-    ldr ip, [r2, #MIRROR_OBJECT_CLASS_OFFSET]
-    UNPOISON_HEAP_REF ip
-    ldr r3, [r3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
-    UNPOISON_HEAP_REF r3
+#endif  // USE_READ_BARRIER
+    READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
     cmp r3, ip  @ value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 2ce2a29..0f06727 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -155,6 +155,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = artReadBarrierSlow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 6d9b44a..8ba3d43 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -31,8 +31,6 @@
     ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    THIS_LOAD_REQUIRES_READ_BARRIER
-
     // Loads appropriate callee-save-method.
     ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
 
@@ -95,8 +93,6 @@
     ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefOnly]  .
-    THIS_LOAD_REQUIRES_READ_BARRIER
-
     // Loads appropriate callee-save-method.
     ldr xIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
 
@@ -251,7 +247,6 @@
     ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ldr xIP0, [xIP0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET ]
 
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL
@@ -542,18 +537,18 @@
     // W10 - temporary
     add x9, sp, #8                         // Destination address is bottom of stack + null.
 
-    // Use \@ to differentiate between macro invocations.
-.LcopyParams\@:
+    // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
+    // does not have unique-id variables.
+1:
     cmp w2, #0
-    beq .LendCopyParams\@
+    beq 2f
     sub w2, w2, #4      // Need 65536 bytes of range.
     ldr w10, [x1, x2]
     str w10, [x9, x2]
 
-    b .LcopyParams\@
+    b 1b
 
-.LendCopyParams\@:
-
+2:
     // Store null into ArtMethod* at bottom of frame.
     str xzr, [sp]
 .endm
@@ -592,26 +587,29 @@
     // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
     ldrb w10, [x5]
 
+    // Check the return type and store the correct register into the jvalue in memory.
+    // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
+
     // Don't set anything for a void type.
     cmp w10, #'V'
-    beq .Lexit_art_quick_invoke_stub\@
+    beq 3f
 
+    // Is it a double?
     cmp w10, #'D'
-    bne .Lreturn_is_float\@
+    bne 1f
     str d0, [x4]
-    b .Lexit_art_quick_invoke_stub\@
+    b 3f
 
-.Lreturn_is_float\@:
+1:  // Is it a float?
     cmp w10, #'F'
-    bne .Lreturn_is_int\@
+    bne 2f
     str s0, [x4]
-    b .Lexit_art_quick_invoke_stub\@
+    b 3f
 
-    // Just store x0. Doesn't matter if it is 64 or 32 bits.
-.Lreturn_is_int\@:
+2:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
     str x0, [x4]
 
-.Lexit_art_quick_invoke_stub\@:
+3:  // Finish up.
     ldp x2, x19, [xFP, #32]   // Restore stack pointer and x19.
     .cfi_restore x19
     mov sp, x2
@@ -1119,6 +1117,62 @@
     brk 0                             // We should not return here...
 END art_quick_check_cast
 
+// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
+.macro POP_REG_NE xReg, offset, xExclude
+    .ifnc \xReg, \xExclude
+        ldr \xReg, [sp, #\offset]     // restore xReg
+        .cfi_restore \xReg
+    .endif
+.endm
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * xDest, wDest and xObj are registers, offset is a defined literal such as
+     * MIRROR_OBJECT_CLASS_OFFSET. Dest needs both x and w versions of the same register to handle
+     * name mismatch between instructions. This macro uses the lower 32b of register when possible.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+.macro READ_BARRIER xDest, wDest, xObj, offset
+#ifdef USE_READ_BARRIER
+    // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
+    stp x0, x1, [sp, #-48]!
+    .cfi_adjust_cfa_offset 48
+    .cfi_rel_offset x0, 0
+    .cfi_rel_offset x1, 8
+    stp x2, x3, [sp, #16]
+    .cfi_rel_offset x2, 16
+    .cfi_rel_offset x3, 24
+    stp x4, xLR, [sp, #32]
+    .cfi_rel_offset x4, 32
+    .cfi_rel_offset x30, 40
+
+    // mov x0, x0                   // pass ref in x0 (no-op for now since parameter ref is unused)
+    .ifnc \xObj, x1
+        mov x1, \xObj               // pass xObj
+    .endif
+    mov w2, #\offset                // pass offset
+    bl artReadBarrierSlow           // artReadBarrierSlow(ref, xObj, offset)
+    // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
+    .ifnc \wDest, w0
+        mov \wDest, w0              // save return value in wDest
+    .endif
+
+    // Conditionally restore saved registers
+    POP_REG_NE x0, 0, \xDest
+    POP_REG_NE x1, 8, \xDest
+    POP_REG_NE x2, 16, \xDest
+    POP_REG_NE x3, 24, \xDest
+    POP_REG_NE x4, 32, \xDest
+    ldr xLR, [sp, #40]
+    .cfi_restore x30
+    add sp, sp, #48
+    .cfi_adjust_cfa_offset -48
+#else
+    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
+    UNPOISON_HEAP_REF \wDest
+#endif  // USE_READ_BARRIER
+.endm
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -1146,17 +1200,17 @@
     b art_quick_throw_array_bounds
 END art_quick_aput_obj_with_bound_check
 
+#ifdef USE_READ_BARRIER
+    .extern artReadBarrierSlow
+#endif
 ENTRY art_quick_aput_obj
     cbz x2, .Ldo_aput_null
-    ldr w3, [x0, #MIRROR_OBJECT_CLASS_OFFSET]            // Heap reference = 32b
+    READ_BARRIER x3, w3, x0, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
                                                          // This also zero-extends to x3
-    UNPOISON_HEAP_REF w3
-    ldr w4, [x2, #MIRROR_OBJECT_CLASS_OFFSET]            // Heap reference = 32b
+    READ_BARRIER x4, w4, x2, MIRROR_OBJECT_CLASS_OFFSET     // Heap reference = 32b
                                                          // This also zero-extends to x4
-    UNPOISON_HEAP_REF w4
-    ldr w3, [x3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Heap reference = 32b
+    READ_BARRIER x3, w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET // Heap reference = 32b
                                                          // This also zero-extends to x3
-    UNPOISON_HEAP_REF w3
     cmp w3, w4  // value's type == array's component type - trivial assignability
     bne .Lcheck_assignability
 .Ldo_aput:
diff --git a/runtime/arch/instruction_set_features_test.cc b/runtime/arch/instruction_set_features_test.cc
index e6f4e7a..99c2d4d 100644
--- a/runtime/arch/instruction_set_features_test.cc
+++ b/runtime/arch/instruction_set_features_test.cc
@@ -18,7 +18,7 @@
 
 #include <gtest/gtest.h>
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 #include "cutils/properties.h"
 #endif
 
@@ -26,7 +26,7 @@
 
 namespace art {
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 #if defined(__aarch64__)
 TEST(InstructionSetFeaturesTest, DISABLED_FeaturesFromSystemPropertyVariant) {
   LOG(WARNING) << "Test disabled due to no CPP define for A53 erratum 835769";
@@ -111,7 +111,7 @@
 }
 #endif
 
-#ifndef HAVE_ANDROID_OS
+#ifndef __ANDROID__
 TEST(InstructionSetFeaturesTest, HostFeaturesFromCppDefines) {
   std::string error_msg;
   std::unique_ptr<const InstructionSetFeatures> default_features(
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index b1aa3ee..f9c5315 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -44,7 +44,8 @@
       entrypoint == kQuickCmpgDouble ||
       entrypoint == kQuickCmpgFloat ||
       entrypoint == kQuickCmplDouble ||
-      entrypoint == kQuickCmplFloat;
+      entrypoint == kQuickCmplFloat ||
+      entrypoint == kQuickReadBarrierSlow;
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 09a018e..4e4b91f 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -279,6 +279,8 @@
 
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierSlow = artReadBarrierSlow;
+  static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 2819f92..4d5004f 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -79,7 +79,6 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -127,7 +126,6 @@
 
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     lw $t0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -219,7 +217,6 @@
     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
     lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
     lw $t0, 0($t0)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     lw $t0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -627,6 +624,76 @@
 END art_quick_check_cast
 
     /*
+     * Restore rReg's value from offset($sp) if rReg is not the same as rExclude.
+     * nReg is the register number for rReg.
+     */
+.macro POP_REG_NE rReg, nReg, offset, rExclude
+    .ifnc \rReg, \rExclude
+        lw \rReg, \offset($sp)      # restore rReg
+        .cfi_restore \nReg
+    .endif
+.endm
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+.macro READ_BARRIER rDest, rObj, offset
+#ifdef USE_READ_BARRIER
+    # saved registers used in art_quick_aput_obj: a0-a2, t0-t1, t9, ra. 8 words for 16B alignment.
+    addiu  $sp, $sp, -32
+    .cfi_adjust_cfa_offset 32
+    sw     $ra, 28($sp)
+    .cfi_rel_offset 31, 28
+    sw     $t9, 24($sp)
+    .cfi_rel_offset 25, 24
+    sw     $t1, 20($sp)
+    .cfi_rel_offset 9, 20
+    sw     $t0, 16($sp)
+    .cfi_rel_offset 8, 16
+    sw     $a2, 8($sp)              # padding slot at offset 12 (padding can be any slot in the 32B)
+    .cfi_rel_offset 6, 8
+    sw     $a1, 4($sp)
+    .cfi_rel_offset 5, 4
+    sw     $a0, 0($sp)
+    .cfi_rel_offset 4, 0
+
+    # move $a0, $a0                 # pass ref in a0 (no-op for now since parameter ref is unused)
+    .ifnc \rObj, $a1
+        move $a1, \rObj             # pass rObj
+    .endif
+    addiu $a2, $zero, \offset       # pass offset
+    jal artReadBarrierSlow          # artReadBarrierSlow(ref, rObj, offset)
+    addiu  $sp, $sp, -16            # Use branch delay slot to reserve argument slots on the stack
+                                    # before the call to artReadBarrierSlow.
+    addiu  $sp, $sp, 16             # restore stack after call to artReadBarrierSlow
+    # No need to unpoison return value in v0, artReadBarrierSlow() would do the unpoisoning.
+    move \rDest, $v0                # save return value in rDest
+                                    # (rDest cannot be v0 in art_quick_aput_obj)
+
+    lw     $a0, 0($sp)              # restore registers except rDest
+                                    # (rDest can only be t0 or t1 in art_quick_aput_obj)
+    .cfi_restore 4
+    lw     $a1, 4($sp)
+    .cfi_restore 5
+    lw     $a2, 8($sp)
+    .cfi_restore 6
+    POP_REG_NE $t0, 8, 16, \rDest
+    POP_REG_NE $t1, 9, 20, \rDest
+    lw     $t9, 24($sp)
+    .cfi_restore 25
+    lw     $ra, 28($sp)             # restore $ra
+    .cfi_restore 31
+    addiu  $sp, $sp, 32
+    .cfi_adjust_cfa_offset -32
+#else
+    lw     \rDest, \offset(\rObj)
+    UNPOISON_HEAP_REF \rDest
+#endif  // USE_READ_BARRIER
+.endm
+
+    /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
      * a0 = array, a1 = index, a2 = value
@@ -648,15 +715,15 @@
     move $a1, $t0
 END art_quick_aput_obj_with_bound_check
 
+#ifdef USE_READ_BARRIER
+    .extern artReadBarrierSlow
+#endif
 ENTRY art_quick_aput_obj
     beqz $a2, .Ldo_aput_null
     nop
-    lw $t0, MIRROR_OBJECT_CLASS_OFFSET($a0)
-    UNPOISON_HEAP_REF $t0
-    lw $t1, MIRROR_OBJECT_CLASS_OFFSET($a2)
-    UNPOISON_HEAP_REF $t1
-    lw $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET($t0)
-    UNPOISON_HEAP_REF $t0
+    READ_BARRIER $t0, $a0, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER $t1, $a2, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER $t0, $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
     bne $t1, $t0, .Lcheck_assignability  # value's type == array's component type - trivial assignability
     nop
 .Ldo_aput:
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 4904af9..ec02d5a 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -186,6 +186,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = artReadBarrierSlow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index abca70b..c30e6ca 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -89,7 +89,6 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ld      $t1, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place ArtMethod* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -132,7 +131,6 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ld      $t1, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -255,7 +253,6 @@
     # load appropriate callee-save-method
     ld      $t1, %got(_ZN3art7Runtime9instance_E)($gp)
     ld      $t1, 0($t1)
-    THIS_LOAD_REQUIRES_READ_BARRIER
     ld      $t1, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t1)
     sd      $t1, 0($sp)                                # Place Method* at bottom of stack.
     sd      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
@@ -888,6 +885,77 @@
     move $a2, rSELF                 # pass Thread::Current
 END art_quick_check_cast
 
+
+    /*
+     * Restore rReg's value from offset($sp) if rReg is not the same as rExclude.
+     * nReg is the register number for rReg.
+     */
+.macro POP_REG_NE rReg, nReg, offset, rExclude
+    .ifnc \rReg, \rExclude
+        ld \rReg, \offset($sp)      # restore rReg
+        .cfi_restore \nReg
+    .endif
+.endm
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+.macro READ_BARRIER rDest, rObj, offset
+#ifdef USE_READ_BARRIER
+    # saved registers used in art_quick_aput_obj: a0-a2, t0-t1, t9, ra. 16B-aligned.
+    daddiu  $sp, $sp, -64
+    .cfi_adjust_cfa_offset 64
+    sd     $ra, 56($sp)
+    .cfi_rel_offset 31, 56
+    sd     $t9, 48($sp)
+    .cfi_rel_offset 25, 48
+    sd     $t1, 40($sp)
+    .cfi_rel_offset 13, 40
+    sd     $t0, 32($sp)
+    .cfi_rel_offset 12, 32
+    sd     $a2, 16($sp)             # padding slot at offset 24 (padding can be any slot in the 64B)
+    .cfi_rel_offset 6, 16
+    sd     $a1, 8($sp)
+    .cfi_rel_offset 5, 8
+    sd     $a0, 0($sp)
+    .cfi_rel_offset 4, 0
+
+    # move $a0, $a0                 # pass ref in a0 (no-op for now since parameter ref is unused)
+    .ifnc \rObj, $a1
+        move $a1, \rObj             # pass rObj
+    .endif
+    daddiu $a2, $zero, \offset      # pass offset
+    jal artReadBarrierSlow          # artReadBarrierSlow(ref, rObj, offset)
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
+                                    # t8 may be clobbered in artReadBarrierSlow.
+    # No need to unpoison return value in v0, artReadBarrierSlow() would do the unpoisoning.
+    move \rDest, $v0                # save return value in rDest
+                                    # (rDest cannot be v0 in art_quick_aput_obj)
+
+    ld     $a0, 0($sp)              # restore registers except rDest
+                                    # (rDest can only be t0 or t1 in art_quick_aput_obj)
+    .cfi_restore 4
+    ld     $a1, 8($sp)
+    .cfi_restore 5
+    ld     $a2, 16($sp)
+    .cfi_restore 6
+    POP_REG_NE $t0, 12, 32, \rDest
+    POP_REG_NE $t1, 13, 40, \rDest
+    ld     $t9, 48($sp)
+    .cfi_restore 25
+    ld     $ra, 56($sp)             # restore $ra
+    .cfi_restore 31
+    daddiu  $sp, $sp, 64
+    .cfi_adjust_cfa_offset -64
+    SETUP_GP                        # set up gp because we are not returning
+#else
+    lwu     \rDest, \offset(\rObj)
+    UNPOISON_HEAP_REF \rDest
+#endif  // USE_READ_BARRIER
+.endm
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -913,12 +981,9 @@
 ENTRY art_quick_aput_obj
     beq  $a2, $zero, .Ldo_aput_null
     nop
-    lwu $t0, MIRROR_OBJECT_CLASS_OFFSET($a0)
-    UNPOISON_HEAP_REF $t0
-    lwu $t1, MIRROR_OBJECT_CLASS_OFFSET($a2)
-    UNPOISON_HEAP_REF $t1
-    lwu $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET($t0)
-    UNPOISON_HEAP_REF $t0
+    READ_BARRIER $t0, $a0, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER $t1, $a2, MIRROR_OBJECT_CLASS_OFFSET
+    READ_BARRIER $t0, $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
     bne $t1, $t0, .Lcheck_assignability  # value's type == array's component type - trivial assignability
     nop
 .Ldo_aput:
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 0831c26..195b3b3 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -126,7 +126,7 @@
           // Use the result from r0
         : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer)
-        : "memory");  // clobber.
+        : "r0", "memory");  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         // Spill x0-x7 which we say we don't clobber. May contain args.
@@ -479,7 +479,7 @@
           // Use the result from r0
         : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer), [hidden] "r"(hidden)
-        : "memory");  // clobber.
+        : "r0", "memory");  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         // Spill x0-x7 which we say we don't clobber. May contain args.
@@ -1124,8 +1124,6 @@
 
 
 TEST_F(StubTest, APutObj) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
@@ -1258,8 +1256,6 @@
 }
 
 TEST_F(StubTest, AllocObject) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
   // This will lead to OOM  error messages in the log.
@@ -1385,8 +1381,6 @@
 }
 
 TEST_F(StubTest, AllocObjectArray) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
@@ -1474,8 +1468,6 @@
 
 
 TEST_F(StubTest, StringCompareTo) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
@@ -2152,8 +2144,6 @@
 }
 
 TEST_F(StubTest, Fields8) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2166,8 +2156,6 @@
 }
 
 TEST_F(StubTest, Fields16) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2180,8 +2168,6 @@
 }
 
 TEST_F(StubTest, Fields32) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2193,8 +2179,6 @@
 }
 
 TEST_F(StubTest, FieldsObj) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2206,8 +2190,6 @@
 }
 
 TEST_F(StubTest, Fields64) {
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   self->TransitionFromSuspendedToRunnable();
@@ -2221,8 +2203,6 @@
 TEST_F(StubTest, IMT) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
 
   ScopedObjectAccess soa(self);
@@ -2342,8 +2322,6 @@
 
 TEST_F(StubTest, StringIndexOf) {
 #if defined(__arm__) || defined(__aarch64__)
-  TEST_DISABLED_FOR_READ_BARRIER();
-
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
@@ -2416,4 +2394,40 @@
 #endif
 }
 
+TEST_F(StubTest, ReadBarrier) {
+#if defined(ART_USE_READ_BARRIER) && (defined(__i386__) || defined(__arm__) || \
+      defined(__aarch64__) || defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__)))
+  Thread* self = Thread::Current();
+
+  const uintptr_t readBarrierSlow = StubTest::GetEntrypoint(self, kQuickReadBarrierSlow);
+
+  // Create an object
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::Class> c(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+
+  // Build an object instance
+  Handle<mirror::Object> obj(hs.NewHandle(c->AllocObject(soa.Self())));
+
+  EXPECT_FALSE(self->IsExceptionPending());
+
+  size_t result = Invoke3(0U, reinterpret_cast<size_t>(obj.Get()),
+                          mirror::Object::ClassOffset().SizeValue(), readBarrierSlow, self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
+  mirror::Class* klass = reinterpret_cast<mirror::Class*>(result);
+  EXPECT_EQ(klass, obj->GetClass());
+
+  // Tests done.
+#else
+  LOG(INFO) << "Skipping read_barrier_slow";
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping read_barrier_slow" << std::endl;
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 737f4d1..e2632c1 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -28,6 +28,9 @@
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      QuickEntryPoints* qpoints) {
   // Interpreter
@@ -141,6 +144,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index ebfb3fa..1da5a2f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -33,7 +33,6 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     pushl RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
@@ -60,7 +59,6 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the top quick frame.
@@ -106,7 +104,6 @@
     movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
     movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
     // Push save all callee-save method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     pushl RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
     CFI_ADJUST_CFA_OFFSET(4)
     // Store esp as the stop quick frame.
@@ -1126,6 +1123,53 @@
     UNREACHABLE
 END_FUNCTION art_quick_check_cast
 
+// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
+MACRO2(POP_REG_NE, reg, exclude_reg)
+    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
+      addl MACRO_LITERAL(4), %esp
+      CFI_ADJUST_CFA_OFFSET(-4)
+    .else
+      POP RAW_VAR(reg)
+    .endif
+END_MACRO
+
+    /*
+     * Macro to insert read barrier, only used in art_quick_aput_obj.
+     * obj_reg and dest_reg are registers, offset is a defined literal such as
+     * MIRROR_OBJECT_CLASS_OFFSET.
+     * pop_eax is a boolean flag, indicating if eax is popped after the call.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     */
+MACRO4(READ_BARRIER, obj_reg, offset, dest_reg, pop_eax)
+#ifdef USE_READ_BARRIER
+    PUSH eax                        // save registers used in art_quick_aput_obj
+    PUSH ebx
+    PUSH edx
+    PUSH ecx
+    // Outgoing argument set up
+    pushl MACRO_LITERAL((RAW_VAR(offset)))  // pass offset, double parentheses are necessary
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH RAW_VAR(obj_reg)           // pass obj_reg
+    PUSH eax                        // pass ref, just pass eax for now since parameter ref is unused
+    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj_reg, offset)
+    // No need to unpoison return value in eax, artReadBarrierSlow() would do the unpoisoning.
+    .ifnc RAW_VAR(dest_reg), eax
+      movl %eax, REG_VAR(dest_reg)  // save loaded ref in dest_reg
+    .endif
+    addl MACRO_LITERAL(12), %esp    // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-12)
+    POP_REG_NE ecx, RAW_VAR(dest_reg) // Restore args except dest_reg
+    POP_REG_NE edx, RAW_VAR(dest_reg)
+    POP_REG_NE ebx, RAW_VAR(dest_reg)
+    .ifc RAW_VAR(pop_eax), true
+      POP_REG_NE eax, RAW_VAR(dest_reg)
+    .endif
+#else
+    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg)
+    UNPOISON_HEAP_REF RAW_VAR(dest_reg)
+#endif  // USE_READ_BARRIER
+END_MACRO
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -1149,17 +1193,20 @@
 DEFINE_FUNCTION art_quick_aput_obj
     test %edx, %edx              // store of null
     jz .Ldo_aput_null
-    movl MIRROR_OBJECT_CLASS_OFFSET(%eax), %ebx
-    UNPOISON_HEAP_REF ebx
-    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
-    UNPOISON_HEAP_REF ebx
+    READ_BARRIER eax, MIRROR_OBJECT_CLASS_OFFSET, ebx, true
+    READ_BARRIER ebx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ebx, true
     // value's type == array's component type - trivial assignability
-#ifdef USE_HEAP_POISONING
-    PUSH eax  // save eax
+#if defined(USE_READ_BARRIER)
+    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, false
+    cmpl %eax, %ebx
+    POP eax                      // restore eax from the push in the beginning of READ_BARRIER macro
+#elif defined(USE_HEAP_POISONING)
+    PUSH eax                     // save eax
+    // Cannot call READ_BARRIER macro here, because the above push messes up stack alignment.
     movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax
     UNPOISON_HEAP_REF eax
     cmpl %eax, %ebx
-    POP  eax  // restore eax
+    POP eax                      // restore eax
 #else
     cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ebx
 #endif
@@ -1181,6 +1228,8 @@
     subl LITERAL(8), %esp         // alignment padding
     CFI_ADJUST_CFA_OFFSET(8)
 #ifdef USE_HEAP_POISONING
+    // This load does not need read barrier, since edx is unchanged and there's no GC safe point
+    // from last read of MIRROR_OBJECT_CLASS_OFFSET(%edx).
     movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax  // pass arg2 - type of the value to be stored
     UNPOISON_HEAP_REF eax
     PUSH eax
@@ -1696,5 +1745,15 @@
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
 
+DEFINE_FUNCTION art_quick_read_barrier_slow
+    PUSH edx                        // pass arg3 - offset
+    PUSH ecx                        // pass arg2 - obj
+    PUSH eax                        // pass arg1 - ref
+    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
+    addl LITERAL(12), %esp          // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-12)
+    ret
+END_FUNCTION art_quick_read_barrier_slow
+
     // TODO: implement these!
 UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index 706ae58..cf0039c 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -24,6 +24,7 @@
 #define MACRO1(macro_name, macro_arg1) .macro macro_name macro_arg1
 #define MACRO2(macro_name, macro_arg1, macro_arg2) .macro macro_name macro_arg1, macro_arg2
 #define MACRO3(macro_name, macro_arg1, macro_arg2, macro_arg3) .macro macro_name macro_arg1, macro_arg2, macro_arg3
+#define MACRO4(macro_name, macro_arg1, macro_arg2, macro_arg3, macro_arg4) .macro macro_name macro_arg1, macro_arg2, macro_arg3, macro_arg4
 #define END_MACRO .endm
 
 #if defined(__clang__)
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index d0ab9d5..ef1bb5f 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -29,6 +29,9 @@
 extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
                                                    const mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      QuickEntryPoints* qpoints) {
 #if defined(__APPLE__)
@@ -145,6 +148,7 @@
 
   // Read barrier
   qpoints->pReadBarrierJni = ReadBarrierJni;
+  qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
 #endif  // __APPLE__
 };
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0eeb03a..f4c9488 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -66,7 +66,6 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for save all callee save frame method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
@@ -109,7 +108,6 @@
     movq %xmm14, 24(%rsp)
     movq %xmm15, 32(%rsp)
     // R10 := ArtMethod* for refs only callee save frame method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Store ArtMethod* to bottom of stack.
     movq %r10, 0(%rsp)
@@ -168,7 +166,6 @@
     subq MACRO_LITERAL(80 + 4 * 8), %rsp
     CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
     // R10 := ArtMethod* for ref and args callee save frame method.
-    THIS_LOAD_REQUIRES_READ_BARRIER
     movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     // Save FPRs.
     movq %xmm0, 16(%rsp)
@@ -920,8 +917,12 @@
     // Fast path tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
     // RDX, RCX, R8, R9: free. RAX: return val.
+    // TODO: Add read barrier when this function is used.
+    // Might need a special macro since rsi and edx is 32b/64b mismatched.
     movl ART_METHOD_DEX_CACHE_TYPES_OFFSET(%rsi), %edx  // Load dex cache resolved types array
     UNPOISON_HEAP_REF edx
+    // TODO: Add read barrier when this function is used.
+    // Might need to break down into multiple instructions to get the base address in a register.
                                                                // Load the class
     movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdx, %rdi, MIRROR_OBJECT_ARRAY_COMPONENT_SIZE), %edx
     UNPOISON_HEAP_REF edx
@@ -1153,6 +1154,60 @@
 END_FUNCTION art_quick_check_cast
 
 
+// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
+MACRO2(POP_REG_NE, reg, exclude_reg)
+    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
+      addq MACRO_LITERAL(8), %rsp
+      CFI_ADJUST_CFA_OFFSET(-8)
+    .else
+      POP RAW_VAR(reg)
+    .endif
+END_MACRO
+
+    /*
+     * Macro to insert read barrier, used in art_quick_aput_obj and art_quick_alloc_object_tlab.
+     * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
+     * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
+     * 64b PUSH/POP and 32b argument.
+     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
+     *
+     * As with art_quick_aput_obj* functions, the 64b versions are in comments.
+     */
+MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
+#ifdef USE_READ_BARRIER
+    PUSH rax                            // save registers that might be used
+    PUSH rdi
+    PUSH rsi
+    PUSH rdx
+    PUSH rcx
+    SETUP_FP_CALLEE_SAVE_FRAME
+    // Outgoing argument set up
+    // movl %edi, %edi                  // pass ref, no-op for now since parameter ref is unused
+    // // movq %rdi, %rdi
+    movl REG_VAR(obj_reg), %esi         // pass obj_reg
+    // movq REG_VAR(obj_reg), %rsi
+    movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
+    // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
+    call SYMBOL(artReadBarrierSlow)     // artReadBarrierSlow(ref, obj_reg, offset)
+    // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
+    .ifnc RAW_VAR(dest_reg32), eax
+    // .ifnc RAW_VAR(dest_reg64), rax
+      movl %eax, REG_VAR(dest_reg32)    // save loaded ref in dest_reg
+      // movq %rax, REG_VAR(dest_reg64)
+    .endif
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
+    POP_REG_NE rdx, RAW_VAR(dest_reg64)
+    POP_REG_NE rsi, RAW_VAR(dest_reg64)
+    POP_REG_NE rdi, RAW_VAR(dest_reg64)
+    POP_REG_NE rax, RAW_VAR(dest_reg64)
+#else
+    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
+    // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
+    UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
+#endif  // USE_READ_BARRIER
+END_MACRO
+
     /*
      * Entry from managed code for array put operations of objects where the value being stored
      * needs to be checked for compatibility.
@@ -1197,15 +1252,13 @@
     testl %edx, %edx                // store of null
 //  test %rdx, %rdx
     jz .Ldo_aput_null
-    movl MIRROR_OBJECT_CLASS_OFFSET(%edi), %ecx
-//  movq MIRROR_OBJECT_CLASS_OFFSET(%rdi), %rcx
-    UNPOISON_HEAP_REF ecx
-    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ecx), %ecx
-//  movq MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %rcx
-    UNPOISON_HEAP_REF ecx
-#ifdef USE_HEAP_POISONING
-    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %eax  // rax is free.
-    UNPOISON_HEAP_REF eax
+    READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
+    // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
+    READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
+    // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
+#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
+    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax  // rax is free.
+    // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
     cmpl %eax, %ecx  // value's type == array's component type - trivial assignability
 #else
     cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
@@ -1232,9 +1285,14 @@
     PUSH rdx
     SETUP_FP_CALLEE_SAVE_FRAME
 
-                                  // "Uncompress" = do nothing, as already zero-extended on load.
-    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
-    UNPOISON_HEAP_REF esi
+#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
+    // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
+    movl %eax, %esi               // Pass arg2 = value's class.
+    // movq %rax, %rsi
+#else
+                                     // "Uncompress" = do nothing, as already zero-extended on load.
+    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi  // Pass arg2 = value's class.
+#endif
     movq %rcx, %rdi               // Pass arg1 = array's component type.
 
     call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
@@ -1735,3 +1793,14 @@
     call PLT_SYMBOL(longjmp)
     UNREACHABLE
 END_FUNCTION art_nested_signal_return
+
+DEFINE_FUNCTION art_quick_read_barrier_slow
+    SETUP_FP_CALLEE_SAVE_FRAME
+    subq LITERAL(8), %rsp           // Alignment padding.
+    CFI_ADJUST_CFA_OFFSET(8)
+    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    ret
+END_FUNCTION art_quick_read_barrier_slow
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 17c9fe4..f37e040 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -19,6 +19,7 @@
 #include "arch/context.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/out.h"
 #include "base/stringpiece.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
@@ -565,7 +566,7 @@
 const uint8_t* ArtMethod::GetQuickenedInfo() {
   bool found = false;
   OatFile::OatMethod oat_method =
-      Runtime::Current()->GetClassLinker()->FindOatMethodFor(this, &found);
+      Runtime::Current()->GetClassLinker()->FindOatMethodFor(this, outof(found));
   if (!found || (oat_method.GetQuickCode() != nullptr)) {
     return nullptr;
   }
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index f4f8eaf..350a0d4 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -109,7 +109,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 150 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 151 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 859de4b..7a620e3 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -26,7 +26,7 @@
 #include "utils.h"
 
 // Headers for LogMessage::LogLine.
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 #include "cutils/log.h"
 #else
 #include <sys/types.h>
@@ -47,7 +47,7 @@
 // Print INTERNAL_FATAL messages directly instead of at destruction time. This only works on the
 // host right now: for the device, a stream buf collating output into lines and calling LogLine or
 // lower-level logging is necessary.
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 static constexpr bool kPrintInternalFatalDirectly = false;
 #else
 static constexpr bool kPrintInternalFatalDirectly = !kIsTargetBuild;
@@ -234,7 +234,7 @@
   return data_->GetBuffer();
 }
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 static const android_LogPriority kLogSeverityToAndroidLogPriority[] = {
   ANDROID_LOG_VERBOSE, ANDROID_LOG_DEBUG, ANDROID_LOG_INFO, ANDROID_LOG_WARN,
   ANDROID_LOG_ERROR, ANDROID_LOG_FATAL, ANDROID_LOG_FATAL
@@ -245,7 +245,7 @@
 
 void LogMessage::LogLine(const char* file, unsigned int line, LogSeverity log_severity,
                          const char* message) {
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   const char* tag = ProgramInvocationShortName();
   int priority = kLogSeverityToAndroidLogPriority[log_severity];
   if (priority == ANDROID_LOG_FATAL) {
@@ -264,7 +264,7 @@
 
 void LogMessage::LogLineLowStack(const char* file, unsigned int line, LogSeverity log_severity,
                                  const char* message) {
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   // Use android_writeLog() to avoid stack-based buffers used by android_printLog().
   const char* tag = ProgramInvocationShortName();
   int priority = kLogSeverityToAndroidLogPriority[log_severity];
diff --git a/runtime/base/out.h b/runtime/base/out.h
new file mode 100644
index 0000000..7b4bc12
--- /dev/null
+++ b/runtime/base/out.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_OUT_H_
+#define ART_RUNTIME_BASE_OUT_H_
+
+#include <base/macros.h>
+#include <base/logging.h>
+
+#include <memory>
+// A zero-overhead abstraction marker that means this value is meant to be used as an out
+// parameter for functions. It mimics semantics of a pointer that the function will
+// dereference and output its value into.
+//
+// Inspired by the 'out' language keyword in C#.
+//
+// Declaration example:
+//   int do_work(size_t args, out<int> result);
+//               // returns 0 on success, sets result, otherwise error code
+//
+// Use-site example:
+// // (1) -- out of a local variable or field
+//   int res;
+//   if (do_work(1, outof(res)) {
+//     cout << "success: " << res;
+//   }
+// // (2) -- out of an iterator
+//   std::vector<int> list = {1};
+//   std::vector<int>::iterator it = list.begin();
+//   if (do_work(2, outof_iterator(*it)) {
+//     cout << "success: " << list[0];
+//   }
+// // (3) -- out of a pointer
+//   int* array = &some_other_value;
+//   if (do_work(3, outof_ptr(array))) {
+//     cout << "success: " << *array;
+//   }
+//
+// The type will also automatically decay into a C-style pointer for compatibility
+// with calling legacy code that expect pointers.
+//
+// Declaration example:
+//   void write_data(int* res) { *res = 5; }
+//
+// Use-site example:
+//   int data;
+//   write_data(outof(res));
+//   // data is now '5'
+// (The other outof_* functions can be used analogously when the target is a C-style pointer).
+//
+// ---------------
+//
+// Other typical pointer operations such as addition, subtraction, etc are banned
+// since there is exactly one value being output.
+//
+namespace art {
+
+// Forward declarations. See below for specific functions.
+template <typename T>
+struct out_convertible;  // Implicitly converts to out<T> or T*.
+
+// Helper function that automatically infers 'T'
+//
+// Returns a type that is implicitly convertible to either out<T> or T* depending
+// on the call site.
+//
+// Example:
+//   int do_work(size_t args, out<int> result);
+//               // returns 0 on success, sets result, otherwise error code
+//
+// Usage:
+//   int res;
+//   if (do_work(1, outof(res)) {
+//     cout << "success: " << res;
+//   }
+template <typename T>
+out_convertible<T> outof(T& param) ALWAYS_INLINE;
+
+// Helper function that automatically infers 'T' from a container<T>::iterator.
+// To use when the argument is already inside an iterator.
+//
+// Returns a type that is implicitly convertible to either out<T> or T* depending
+// on the call site.
+//
+// Example:
+//   int do_work(size_t args, out<int> result);
+//               // returns 0 on success, sets result, otherwise error code
+//
+// Usage:
+//   std::vector<int> list = {1};
+//   std::vector<int>::iterator it = list.begin();
+//   if (do_work(2, outof_iterator(*it)) {
+//     cout << "success: " << list[0];
+//   }
+template <typename It>
+auto ALWAYS_INLINE outof_iterator(It iter)
+    -> out_convertible<typename std::remove_reference<decltype(*iter)>::type>;
+
+// Helper function that automatically infers 'T'.
+// To use when the argument is already a pointer.
+//
+// ptr must be not-null, else a DCHECK failure will occur.
+//
+// Returns a type that is implicitly convertible to either out<T> or T* depending
+// on the call site.
+//
+// Example:
+//   int do_work(size_t args, out<int> result);
+//               // returns 0 on success, sets result, otherwise error code
+//
+// Usage:
+//   int* array = &some_other_value;
+//   if (do_work(3, outof_ptr(array))) {
+//     cout << "success: " << *array;
+//   }
+template <typename T>
+out_convertible<T> outof_ptr(T* ptr) ALWAYS_INLINE;
+
+// Zero-overhead wrapper around a non-null non-const pointer meant to be used to output
+// the result of parameters. There are no other extra guarantees.
+//
+// The most common use case is to treat this like a typical pointer argument, for example:
+//
+// void write_out_5(out<int> x) {
+//   *x = 5;
+// }
+//
+// The following operations are supported:
+//   operator* -> use like a pointer (guaranteed to be non-null)
+//   == and != -> compare against other pointers for (in)equality
+//   begin/end -> use in standard C++ algorithms as if it was an iterator
+template <typename T>
+struct out {
+  // Has to be mutable lref. Otherwise how would you write something as output into it?
+  explicit inline out(T& param)
+    : param_(param) {}
+
+  // Model a single-element iterator (or pointer) to the parameter.
+  inline T& operator *() {
+    return param_;
+  }
+
+  // Model dereferencing fields/methods on a pointer.
+  inline T* operator->() {
+    return std::addressof(param_);
+  }
+
+  //
+  // Comparison against this or other pointers.
+  //
+  template <typename T2>
+  inline bool operator==(const T2* other) const {
+    return std::addressof(param_) == other;
+  }
+
+  template <typename T2>
+  inline bool operator==(const out<T>& other) const {
+    return std::addressof(param_) == std::addressof(other.param_);
+  }
+
+  // An out-parameter is never null.
+  inline bool operator==(std::nullptr_t) const {
+    return false;
+  }
+
+  template <typename T2>
+  inline bool operator!=(const T2* other) const {
+    return std::addressof(param_) != other;
+  }
+
+  template <typename T2>
+  inline bool operator!=(const out<T>& other) const {
+    return std::addressof(param_) != std::addressof(other.param_);
+  }
+
+  // An out-parameter is never null.
+  inline bool operator!=(std::nullptr_t) const {
+    return true;
+  }
+
+  //
+  // Iterator interface implementation. Use with standard algorithms.
+  // TODO: (add items in iterator_traits if this is truly useful).
+  //
+
+  inline T* begin() {
+    return std::addressof(param_);
+  }
+
+  inline const T* begin() const {
+    return std::addressof(param_);
+  }
+
+  inline T* end() {
+    return std::addressof(param_) + 1;
+  }
+
+  inline const T* end() const {
+    return std::addressof(param_) + 1;
+  }
+
+ private:
+  T& param_;
+};
+
+//
+// IMPLEMENTATION DETAILS
+//
+
+//
+// This intermediate type should not be used directly by user code.
+//
+// It enables 'outof(x)' to be passed into functions that expect either
+// an out<T> **or** a regular C-style pointer (T*).
+//
+template <typename T>
+struct out_convertible {
+  explicit inline out_convertible(T& param)
+    : param_(param) {
+  }
+
+  // Implicitly convert into an out<T> for standard usage.
+  inline operator out<T>() {
+    return out<T>(param_);
+  }
+
+  // Implicitly convert into a '*' for legacy usage.
+  inline operator T*() {
+    return std::addressof(param_);
+  }
+ private:
+  T& param_;
+};
+
+// Helper function that automatically infers 'T'
+template <typename T>
+inline out_convertible<T> outof(T& param) {
+  return out_convertible<T>(param);
+}
+
+// Helper function that automatically infers 'T'.
+// To use when the argument is already inside an iterator.
+template <typename It>
+inline auto outof_iterator(It iter)
+    -> out_convertible<typename std::remove_reference<decltype(*iter)>::type> {
+  return outof(*iter);
+}
+
+// Helper function that automatically infers 'T'.
+// To use when the argument is already a pointer.
+template <typename T>
+inline out_convertible<T> outof_ptr(T* ptr) {
+  DCHECK(ptr != nullptr);
+  return outof(*ptr);
+}
+
+// Helper function that automatically infers 'T'.
+// Forwards an out parameter from one function into another.
+template <typename T>
+inline out_convertible<T> outof_forward(out<T>& out_param) {
+  T& param = *out_param;
+  return out_convertible<T>(param);
+}
+
+}  // namespace art
+#endif  // ART_RUNTIME_BASE_OUT_H_
diff --git a/runtime/base/out_fwd.h b/runtime/base/out_fwd.h
new file mode 100644
index 0000000..6b2f926
--- /dev/null
+++ b/runtime/base/out_fwd.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_OUT_FWD_H_
+#define ART_RUNTIME_BASE_OUT_FWD_H_
+
+// Forward declaration for "out<T>". See <out.h> for more information.
+// Other headers use only the forward declaration.
+
+// Callers of functions that take an out<T> parameter should #include <out.h> to get outof_.
+// which constructs out<T> through type inference.
+namespace art {
+template <typename T>
+struct out;
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_OUT_FWD_H_
diff --git a/runtime/base/out_test.cc b/runtime/base/out_test.cc
new file mode 100644
index 0000000..4274200
--- /dev/null
+++ b/runtime/base/out_test.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "out.h"
+
+#include <algorithm>
+#include <gtest/gtest.h>
+
+namespace art {
+
+struct OutTest : public testing::Test {
+  // Multiplies values less than 10 by two, stores the result and returns 0.
+  // Returns -1 if the original value was not multiplied by two.
+  static int multiply_small_values_by_two(size_t args, out<int> result) {
+    if (args < 10) {
+      *result = args * 2;
+      return 0;
+    } else {
+      return -1;
+    }
+  }
+};
+
+extern "C" int multiply_small_values_by_two_legacy(size_t args, int* result) {
+  if (args < 10) {
+    *result = args * 2;
+    return 0;
+  } else {
+    return -1;
+  }
+}
+
+TEST_F(OutTest, TraditionalCall) {
+  // For calling traditional C++ functions.
+  int res;
+  EXPECT_EQ(multiply_small_values_by_two(1, outof(res)), 0);
+  EXPECT_EQ(2, res);
+}
+
+TEST_F(OutTest, LegacyCall) {
+  // For calling legacy, e.g. C-style functions.
+  int res2;
+  EXPECT_EQ(0, multiply_small_values_by_two_legacy(1, outof(res2)));
+  EXPECT_EQ(2, res2);
+}
+
+TEST_F(OutTest, CallFromIterator) {
+  // For calling a function with a parameter originating as an iterator.
+  std::vector<int> list = {1, 2, 3};  // NOLINT [whitespace/labels] [4]
+  std::vector<int>::iterator it = list.begin();
+
+  EXPECT_EQ(0, multiply_small_values_by_two(2, outof_iterator(it)));
+  EXPECT_EQ(4, list[0]);
+}
+
+TEST_F(OutTest, CallFromPointer) {
+  // For calling a function with a parameter originating as a C-pointer.
+  std::vector<int> list = {1, 2, 3};  // NOLINT [whitespace/labels] [4]
+
+  int* list_ptr = &list[2];  // 3
+
+  EXPECT_EQ(0, multiply_small_values_by_two(2, outof_ptr(list_ptr)));
+  EXPECT_EQ(4, list[2]);
+}
+
+TEST_F(OutTest, OutAsIterator) {
+  // For using the out<T> parameter as an iterator inside of the callee.
+  std::vector<int> list;
+  int x = 100;
+  out<int> out_from_x = outof(x);
+
+  for (const int& val : out_from_x) {
+    list.push_back(val);
+  }
+
+  ASSERT_EQ(1u, list.size());
+  EXPECT_EQ(100, list[0]);
+
+  // A more typical use-case would be to use std algorithms
+  EXPECT_NE(out_from_x.end(),
+            std::find(out_from_x.begin(),
+                      out_from_x.end(),
+                      100));  // Search for '100' in out.
+}
+
+}  // namespace art
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 11901b3..c08417f 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -117,8 +117,10 @@
   return resolved_method;
 }
 
-inline ArtMethod* ClassLinker::ResolveMethod(Thread* self, uint32_t method_idx,
-                                             ArtMethod* referrer, InvokeType type) {
+inline ArtMethod* ClassLinker::ResolveMethod(Thread* self,
+                                             uint32_t method_idx,
+                                             ArtMethod* referrer,
+                                             InvokeType type) {
   ArtMethod* resolved_method = GetResolvedMethod(method_idx, referrer);
   if (UNLIKELY(resolved_method == nullptr)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
@@ -143,7 +145,8 @@
   return GetResolvedField(field_idx, field_declaring_class->GetDexCache());
 }
 
-inline ArtField* ClassLinker::ResolveField(uint32_t field_idx, ArtMethod* referrer,
+inline ArtField* ClassLinker::ResolveField(uint32_t field_idx,
+                                           ArtMethod* referrer,
                                            bool is_static) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
   ArtField* resolved_field = GetResolvedField(field_idx, declaring_class);
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 6a76bf7..82cf7af 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -30,6 +30,7 @@
 #include "base/arena_allocator.h"
 #include "base/casts.h"
 #include "base/logging.h"
+#include "base/out.h"
 #include "base/scoped_arena_containers.h"
 #include "base/scoped_flock.h"
 #include "base/stl_util.h"
@@ -198,7 +199,7 @@
     return lhs.size < rhs.size || (lhs.size == rhs.size && lhs.start_offset > rhs.start_offset);
   }
 };
-typedef std::priority_queue<FieldGap, std::vector<FieldGap>, FieldGapsComparator> FieldGaps;
+using FieldGaps = std::priority_queue<FieldGap, std::vector<FieldGap>, FieldGapsComparator>;
 
 // Adds largest aligned gaps to queue of gaps.
 static void AddFieldGap(uint32_t gap_start, uint32_t gap_end, FieldGaps* gaps) {
@@ -775,12 +776,13 @@
                           // be from multidex, which resolves correctly).
 };
 
-static void AddDexFilesFromOat(const OatFile* oat_file, bool already_loaded,
+static void AddDexFilesFromOat(const OatFile* oat_file,
+                               bool already_loaded,
                                std::priority_queue<DexFileAndClassPair>* heap) {
   const std::vector<const OatDexFile*>& oat_dex_files = oat_file->GetOatDexFiles();
   for (const OatDexFile* oat_dex_file : oat_dex_files) {
     std::string error;
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(outof(error));
     if (dex_file.get() == nullptr) {
       LOG(WARNING) << "Could not create dex file from oat file: " << error;
     } else {
@@ -836,7 +838,7 @@
 // against the following top element. If the descriptor is the same, it is now checked whether
 // the two elements agree on whether their dex file was from an already-loaded oat-file or the
 // new oat file. Any disagreement indicates a collision.
-bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) {
+bool ClassLinker::HasCollisions(const OatFile* oat_file, out<std::string> error_msg) {
   if (!kDuplicateClassesCheck) {
     return false;
   }
@@ -901,10 +903,9 @@
 }
 
 std::vector<std::unique_ptr<const DexFile>> ClassLinker::OpenDexFilesFromOat(
-    const char* dex_location, const char* oat_location,
-    std::vector<std::string>* error_msgs) {
-  CHECK(error_msgs != nullptr);
-
+    const char* dex_location,
+    const char* oat_location,
+    out<std::vector<std::string>> error_msgs) {
   // Verify we aren't holding the mutator lock, which could starve GC if we
   // have to generate or relocate an oat file.
   Locks::mutator_lock_->AssertNotHeld(Thread::Current());
@@ -946,7 +947,7 @@
     std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
     if (oat_file.get() != nullptr) {
       // Take the file only if it has no collisions, or we must take it because of preopting.
-      bool accept_oat_file = !HasCollisions(oat_file.get(), &error_msg);
+      bool accept_oat_file = !HasCollisions(oat_file.get(), outof(error_msg));
       if (!accept_oat_file) {
         // Failed the collision check. Print warning.
         if (Runtime::Current()->IsDexFileFallbackEnabled()) {
@@ -980,8 +981,7 @@
   if (source_oat_file != nullptr) {
     dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
     if (dex_files.empty()) {
-      error_msgs->push_back("Failed to open dex files from "
-          + source_oat_file->GetLocation());
+      error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
     }
   }
 
@@ -1017,7 +1017,8 @@
   return nullptr;
 }
 
-static void SanityCheckArtMethod(ArtMethod* m, mirror::Class* expected_class,
+static void SanityCheckArtMethod(ArtMethod* m,
+                                 mirror::Class* expected_class,
                                  gc::space::ImageSpace* space)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   if (m->IsRuntimeMethod()) {
@@ -1035,9 +1036,11 @@
   }
 }
 
-static void SanityCheckArtMethodPointerArray(
-    mirror::PointerArray* arr, mirror::Class* expected_class, size_t pointer_size,
-    gc::space::ImageSpace* space) SHARED_REQUIRES(Locks::mutator_lock_) {
+static void SanityCheckArtMethodPointerArray(mirror::PointerArray* arr,
+                                             mirror::Class* expected_class,
+                                             size_t pointer_size,
+                                             gc::space::ImageSpace* space)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   CHECK(arr != nullptr);
   for (int32_t j = 0; j < arr->GetLength(); ++j) {
     auto* method = arr->GetElementPtrSize<ArtMethod*>(j, pointer_size);
@@ -1143,7 +1146,7 @@
                                                                      nullptr);
     CHECK(oat_dex_file != nullptr) << oat_file.GetLocation() << " " << dex_file_location;
     std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(outof(error_msg));
     if (dex_file.get() == nullptr) {
       LOG(FATAL) << "Failed to open dex file " << dex_file_location
                  << " from within oat file " << oat_file.GetLocation()
@@ -1236,11 +1239,8 @@
 
 bool ClassLinker::ClassInClassTable(mirror::Class* klass) {
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  auto it = class_table_.Find(GcRoot<mirror::Class>(klass));
-  if (it == class_table_.end()) {
-    return false;
-  }
-  return it->Read() == klass;
+  ClassTable* const class_table = ClassTableForClassLoader(klass->GetClassLoader());
+  return class_table != nullptr && class_table->Contains(klass);
 }
 
 void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) {
@@ -1263,26 +1263,30 @@
     // Moving concurrent:
     // Need to make sure to not copy ArtMethods without doing read barriers since the roots are
     // marked concurrently and we don't hold the classlinker_classes_lock_ when we do the copy.
-    for (GcRoot<mirror::Class>& root : class_table_) {
-      buffered_visitor.VisitRoot(root);
+    std::vector<std::pair<GcRoot<mirror::ClassLoader>, ClassTable*>> reinsert;
+    for (auto it = classes_.begin(); it != classes_.end(); ) {
+      it->second->VisitRoots(visitor, flags);
+      const GcRoot<mirror::ClassLoader>& root = it->first;
+      mirror::ClassLoader* old_ref = root.Read<kWithoutReadBarrier>();
+      root.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+      mirror::ClassLoader* new_ref = root.Read<kWithoutReadBarrier>();
+      if (new_ref != old_ref) {
+        reinsert.push_back(*it);
+        it = classes_.erase(it);
+      } else {
+        ++it;
+      }
     }
-    // PreZygote classes can't move so we won't need to update fields' declaring classes.
-    for (GcRoot<mirror::Class>& root : pre_zygote_class_table_) {
-      buffered_visitor.VisitRoot(root);
+    for (auto& pair : reinsert) {
+      classes_.Put(pair.first, pair.second);
     }
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_class_roots_) {
       mirror::Class* old_ref = root.Read<kWithoutReadBarrier>();
       root.VisitRoot(visitor, RootInfo(kRootStickyClass));
       mirror::Class* new_ref = root.Read<kWithoutReadBarrier>();
-      if (UNLIKELY(new_ref != old_ref)) {
-        // Uh ohes, GC moved a root in the log. Need to search the class_table and update the
-        // corresponding object. This is slow, but luckily for us, this may only happen with a
-        // concurrent moving GC.
-        auto it = class_table_.Find(GcRoot<mirror::Class>(old_ref));
-        DCHECK(it != class_table_.end());
-        *it = GcRoot<mirror::Class>(new_ref);
-      }
+      // Concurrent moving GC marked new roots through the to-space invariant.
+      CHECK_EQ(new_ref, old_ref);
     }
   }
   buffered_visitor.Flush();  // Flush before clearing new_class_roots_.
@@ -1331,91 +1335,103 @@
   }
 }
 
-void ClassLinker::VisitClasses(ClassVisitor* visitor, void* arg) {
+void ClassLinker::VisitClassesInternal(ClassVisitor* visitor) {
+  for (auto& pair : classes_) {
+    ClassTable* const class_table = pair.second;
+    if (!class_table->Visit(visitor)) {
+      return;
+    }
+  }
+}
+
+void ClassLinker::VisitClasses(ClassVisitor* visitor) {
   if (dex_cache_image_class_lookup_required_) {
     MoveImageClassesToClassTable();
   }
-  // TODO: why isn't this a ReaderMutexLock?
-  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  for (GcRoot<mirror::Class>& root : class_table_) {
-    if (!visitor(root.Read(), arg)) {
-      return;
-    }
-  }
-  for (GcRoot<mirror::Class>& root : pre_zygote_class_table_) {
-    if (!visitor(root.Read(), arg)) {
-      return;
-    }
+  Thread* const self = Thread::Current();
+  ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
+  // Not safe to have thread suspension when we are holding a lock.
+  if (self != nullptr) {
+    ScopedAssertNoThreadSuspension nts(self, __FUNCTION__);
+    VisitClassesInternal(visitor);
+  } else {
+    VisitClassesInternal(visitor);
   }
 }
 
-static bool GetClassesVisitorSet(mirror::Class* c, void* arg) {
-  std::set<mirror::Class*>* classes = reinterpret_cast<std::set<mirror::Class*>*>(arg);
-  classes->insert(c);
-  return true;
-}
-
-struct GetClassesVisitorArrayArg {
-  Handle<mirror::ObjectArray<mirror::Class>>* classes;
-  int32_t index;
-  bool success;
+class GetClassesInToVector : public ClassVisitor {
+ public:
+  bool Visit(mirror::Class* klass) OVERRIDE {
+    classes_.push_back(klass);
+    return true;
+  }
+  std::vector<mirror::Class*> classes_;
 };
 
-static bool GetClassesVisitorArray(mirror::Class* c, void* varg)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  GetClassesVisitorArrayArg* arg = reinterpret_cast<GetClassesVisitorArrayArg*>(varg);
-  if (arg->index < (*arg->classes)->GetLength()) {
-    (*arg->classes)->Set(arg->index, c);
-    arg->index++;
-    return true;
-  } else {
-    arg->success = false;
+class GetClassInToObjectArray : public ClassVisitor {
+ public:
+  explicit GetClassInToObjectArray(mirror::ObjectArray<mirror::Class>* arr)
+      : arr_(arr), index_(0) {}
+
+  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    ++index_;
+    if (index_ <= arr_->GetLength()) {
+      arr_->Set(index_ - 1, klass);
+      return true;
+    }
     return false;
   }
-}
 
-void ClassLinker::VisitClassesWithoutClassesLock(ClassVisitor* visitor, void* arg) {
+  bool Succeeded() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return index_ <= arr_->GetLength();
+  }
+
+ private:
+  mirror::ObjectArray<mirror::Class>* const arr_;
+  int32_t index_;
+};
+
+void ClassLinker::VisitClassesWithoutClassesLock(ClassVisitor* visitor) {
   // TODO: it may be possible to avoid secondary storage if we iterate over dex caches. The problem
   // is avoiding duplicates.
   if (!kMovingClasses) {
-    std::set<mirror::Class*> classes;
-    VisitClasses(GetClassesVisitorSet, &classes);
-    for (mirror::Class* klass : classes) {
-      if (!visitor(klass, arg)) {
+    GetClassesInToVector accumulator;
+    VisitClasses(&accumulator);
+    for (mirror::Class* klass : accumulator.classes_) {
+      if (!visitor->Visit(klass)) {
         return;
       }
     }
   } else {
-    Thread* self = Thread::Current();
+    Thread* const self = Thread::Current();
     StackHandleScope<1> hs(self);
-    MutableHandle<mirror::ObjectArray<mirror::Class>> classes =
-        hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
-    GetClassesVisitorArrayArg local_arg;
-    local_arg.classes = &classes;
-    local_arg.success = false;
+    auto classes = hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
     // We size the array assuming classes won't be added to the class table during the visit.
     // If this assumption fails we iterate again.
-    while (!local_arg.success) {
+    while (true) {
       size_t class_table_size;
       {
         ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-        class_table_size = class_table_.Size() + pre_zygote_class_table_.Size();
+        // Add 100 in case new classes get loaded when we are filling in the object array.
+        class_table_size = NumZygoteClasses() + NumNonZygoteClasses() + 100;
       }
       mirror::Class* class_type = mirror::Class::GetJavaLangClass();
       mirror::Class* array_of_class = FindArrayClass(self, &class_type);
       classes.Assign(
           mirror::ObjectArray<mirror::Class>::Alloc(self, array_of_class, class_table_size));
       CHECK(classes.Get() != nullptr);  // OOME.
-      local_arg.index = 0;
-      local_arg.success = true;
-      VisitClasses(GetClassesVisitorArray, &local_arg);
+      GetClassInToObjectArray accumulator(classes.Get());
+      VisitClasses(&accumulator);
+      if (accumulator.Succeeded()) {
+        break;
+      }
     }
     for (int32_t i = 0; i < classes->GetLength(); ++i) {
       // If the class table shrank during creation of the clases array we expect null elements. If
       // the class table grew then the loop repeats. If classes are created after the loop has
       // finished then we don't visit.
       mirror::Class* klass = classes->Get(i);
-      if (klass != nullptr && !visitor(klass, arg)) {
+      if (klass != nullptr && !visitor->Visit(klass)) {
         return;
       }
     }
@@ -1443,6 +1459,7 @@
   mirror::LongArray::ResetArrayClass();
   mirror::ShortArray::ResetArrayClass();
   STLDeleteElements(&oat_files_);
+  STLDeleteValues(&classes_);
 }
 
 mirror::PointerArray* ClassLinker::AllocPointerArray(Thread* self, size_t length) {
@@ -1489,7 +1506,8 @@
   return dex_cache.Get();
 }
 
-mirror::Class* ClassLinker::AllocClass(Thread* self, mirror::Class* java_lang_Class,
+mirror::Class* ClassLinker::AllocClass(Thread* self,
+                                       mirror::Class* java_lang_Class,
                                        uint32_t class_size) {
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -1508,13 +1526,14 @@
   return AllocClass(self, GetClassRoot(kJavaLangClass), class_size);
 }
 
-mirror::ObjectArray<mirror::StackTraceElement>* ClassLinker::AllocStackTraceElementArray(
-    Thread* self, size_t length) {
+mirror::ObjectArray<mirror::StackTraceElement>*
+ClassLinker::AllocStackTraceElementArray(Thread* self, size_t length) {
   return mirror::ObjectArray<mirror::StackTraceElement>::Alloc(
       self, GetClassRoot(kJavaLangStackTraceElementArrayClass), length);
 }
 
-mirror::Class* ClassLinker::EnsureResolved(Thread* self, const char* descriptor,
+mirror::Class* ClassLinker::EnsureResolved(Thread* self,
+                                           const char* descriptor,
                                            mirror::Class* klass) {
   DCHECK(klass != nullptr);
 
@@ -1573,7 +1592,8 @@
 
 // Search a collection of DexFiles for a descriptor
 ClassPathEntry FindInClassPath(const char* descriptor,
-                               size_t hash, const std::vector<const DexFile*>& class_path) {
+                               size_t hash,
+                               const std::vector<const DexFile*>& class_path) {
   for (const DexFile* dex_file : class_path) {
     const DexFile::ClassDef* dex_class_def = dex_file->FindClassDef(descriptor, hash);
     if (dex_class_def != nullptr) {
@@ -1592,16 +1612,17 @@
 }
 
 bool ClassLinker::FindClassInPathClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                                             Thread* self, const char* descriptor,
+                                             Thread* self,
+                                             const char* descriptor,
                                              size_t hash,
                                              Handle<mirror::ClassLoader> class_loader,
-                                             mirror::Class** result) {
+                                             out<mirror::Class*> result) {
   // Termination case: boot class-loader.
   if (IsBootClassLoader(soa, class_loader.Get())) {
     // The boot class loader, search the boot class path.
     ClassPathEntry pair = FindInClassPath(descriptor, hash, boot_class_path_);
     if (pair.second != nullptr) {
-      mirror::Class* klass = LookupClass(self, descriptor, hash, nullptr);
+      mirror::Class* klass = LookupClass(self, descriptor, hash, nullptr /* no classloader */);
       if (klass != nullptr) {
         *result = EnsureResolved(self, descriptor, klass);
       } else {
@@ -1703,7 +1724,8 @@
   return true;
 }
 
-mirror::Class* ClassLinker::FindClass(Thread* self, const char* descriptor,
+mirror::Class* ClassLinker::FindClass(Thread* self,
+                                      const char* descriptor,
                                       Handle<mirror::ClassLoader> class_loader) {
   DCHECK_NE(*descriptor, '\0') << "descriptor is empty string";
   DCHECK(self != nullptr);
@@ -1739,7 +1761,7 @@
   } else {
     ScopedObjectAccessUnchecked soa(self);
     mirror::Class* cp_klass;
-    if (FindClassInPathClassLoader(soa, self, descriptor, hash, class_loader, &cp_klass)) {
+    if (FindClassInPathClassLoader(soa, self, descriptor, hash, class_loader, outof(cp_klass))) {
       // The chain was understood. So the value in cp_klass is either the class we were looking
       // for, or not found.
       if (cp_klass != nullptr) {
@@ -1792,7 +1814,9 @@
   UNREACHABLE();
 }
 
-mirror::Class* ClassLinker::DefineClass(Thread* self, const char* descriptor, size_t hash,
+mirror::Class* ClassLinker::DefineClass(Thread* self,
+                                        const char* descriptor,
+                                        size_t hash,
                                         Handle<mirror::ClassLoader> class_loader,
                                         const DexFile& dex_file,
                                         const DexFile::ClassDef& dex_class_def) {
@@ -1878,7 +1902,7 @@
   auto interfaces = hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
 
   MutableHandle<mirror::Class> h_new_class = hs.NewHandle<mirror::Class>(nullptr);
-  if (!LinkClass(self, descriptor, klass, interfaces, &h_new_class)) {
+  if (!LinkClass(self, descriptor, klass, interfaces, outof(h_new_class))) {
     // Linking failed.
     if (!klass->IsErroneous()) {
       mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
@@ -1961,8 +1985,9 @@
                                          image_pointer_size_);
 }
 
-OatFile::OatClass ClassLinker::FindOatClass(const DexFile& dex_file, uint16_t class_def_idx,
-                                            bool* found) {
+OatFile::OatClass ClassLinker::FindOatClass(const DexFile& dex_file,
+                                            uint16_t class_def_idx,
+                                            out<bool> found) {
   DCHECK_NE(class_def_idx, DexFile::kDexNoIndex16);
   const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile();
   if (oat_dex_file == nullptr) {
@@ -1973,7 +1998,8 @@
   return oat_dex_file->GetOatClass(class_def_idx);
 }
 
-static uint32_t GetOatMethodIndexFromMethodIndex(const DexFile& dex_file, uint16_t class_def_idx,
+static uint32_t GetOatMethodIndexFromMethodIndex(const DexFile& dex_file,
+                                                 uint16_t class_def_idx,
                                                  uint32_t method_idx) {
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx);
   const uint8_t* class_data = dex_file.GetClassData(class_def);
@@ -2007,7 +2033,7 @@
   UNREACHABLE();
 }
 
-const OatFile::OatMethod ClassLinker::FindOatMethodFor(ArtMethod* method, bool* found) {
+const OatFile::OatMethod ClassLinker::FindOatMethodFor(ArtMethod* method, out<bool> found) {
   // Although we overwrite the trampoline of non-static methods, we may get here via the resolution
   // method for direct methods (or virtual methods made direct).
   mirror::Class* declaring_class = method->GetDeclaringClass();
@@ -2039,7 +2065,7 @@
                                              method->GetDexMethodIndex()));
   OatFile::OatClass oat_class = FindOatClass(*declaring_class->GetDexCache()->GetDexFile(),
                                              declaring_class->GetDexClassDefIndex(),
-                                             found);
+                                             outof_forward(found));
   if (!(*found)) {
     return OatFile::OatMethod::Invalid();
   }
@@ -2053,7 +2079,7 @@
     return GetQuickProxyInvokeHandler();
   }
   bool found;
-  OatFile::OatMethod oat_method = FindOatMethodFor(method, &found);
+  OatFile::OatMethod oat_method = FindOatMethodFor(method, outof(found));
   if (found) {
     auto* code = oat_method.GetQuickCode();
     if (code != nullptr) {
@@ -2079,7 +2105,7 @@
     return nullptr;
   }
   bool found;
-  OatFile::OatMethod oat_method = FindOatMethodFor(method, &found);
+  OatFile::OatMethod oat_method = FindOatMethodFor(method, outof(found));
   if (found) {
     return oat_method.GetQuickCode();
   }
@@ -2093,10 +2119,11 @@
   return nullptr;
 }
 
-const void* ClassLinker::GetQuickOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx,
+const void* ClassLinker::GetQuickOatCodeFor(const DexFile& dex_file,
+                                            uint16_t class_def_idx,
                                             uint32_t method_idx) {
   bool found;
-  OatFile::OatClass oat_class = FindOatClass(dex_file, class_def_idx, &found);
+  OatFile::OatClass oat_class = FindOatClass(dex_file, class_def_idx, outof(found));
   if (!found) {
     return nullptr;
   }
@@ -2147,7 +2174,7 @@
   }
   bool has_oat_class;
   OatFile::OatClass oat_class = FindOatClass(dex_file, klass->GetDexClassDefIndex(),
-                                             &has_oat_class);
+                                             outof(has_oat_class));
   // Link the code of methods skipped by LinkCode.
   for (size_t method_index = 0; it.HasNextDirectMethod(); ++method_index, it.Next()) {
     ArtMethod* method = klass->GetDirectMethod(method_index, image_pointer_size_);
@@ -2175,7 +2202,8 @@
   // Ignore virtual methods on the iterator.
 }
 
-void ClassLinker::LinkCode(ArtMethod* method, const OatFile::OatClass* oat_class,
+void ClassLinker::LinkCode(ArtMethod* method,
+                           const OatFile::OatClass* oat_class,
                            uint32_t class_def_method_index) {
   Runtime* const runtime = Runtime::Current();
   if (runtime->IsAotCompiler()) {
@@ -2227,8 +2255,10 @@
   }
 }
 
-void ClassLinker::SetupClass(const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
-                             Handle<mirror::Class> klass, mirror::ClassLoader* class_loader) {
+void ClassLinker::SetupClass(const DexFile& dex_file,
+                             const DexFile::ClassDef& dex_class_def,
+                             Handle<mirror::Class> klass,
+                             mirror::ClassLoader* class_loader) {
   CHECK(klass.Get() != nullptr);
   CHECK(klass->GetDexCache() != nullptr);
   CHECK_EQ(mirror::Class::kStatusNotReady, klass->GetStatus());
@@ -2248,7 +2278,8 @@
   CHECK(klass->GetDexCacheStrings() != nullptr);
 }
 
-void ClassLinker::LoadClass(Thread* self, const DexFile& dex_file,
+void ClassLinker::LoadClass(Thread* self,
+                            const DexFile& dex_file,
                             const DexFile::ClassDef& dex_class_def,
                             Handle<mirror::Class> klass) {
   const uint8_t* class_data = dex_file.GetClassData(dex_class_def);
@@ -2258,7 +2289,7 @@
   bool has_oat_class = false;
   if (Runtime::Current()->IsStarted() && !Runtime::Current()->IsAotCompiler()) {
     OatFile::OatClass oat_class = FindOatClass(dex_file, klass->GetDexClassDefIndex(),
-                                               &has_oat_class);
+                                               outof(has_oat_class));
     if (has_oat_class) {
       LoadClassMembers(self, dex_file, class_data, klass, &oat_class);
     }
@@ -2287,7 +2318,8 @@
   return reinterpret_cast<ArtMethod*>(ptr);
 }
 
-void ClassLinker::LoadClassMembers(Thread* self, const DexFile& dex_file,
+void ClassLinker::LoadClassMembers(Thread* self,
+                                   const DexFile& dex_file,
                                    const uint8_t* class_data,
                                    Handle<mirror::Class> klass,
                                    const OatFile::OatClass* oat_class) {
@@ -2377,10 +2409,13 @@
     }
     DCHECK(!it.HasNext());
   }
+  // Ensure that the card is marked so that remembered sets pick up native roots.
+  Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(klass.Get());
   self->AllowThreadSuspension();
 }
 
-void ClassLinker::LoadField(const ClassDataItemIterator& it, Handle<mirror::Class> klass,
+void ClassLinker::LoadField(const ClassDataItemIterator& it,
+                            Handle<mirror::Class> klass,
                             ArtField* dst) {
   const uint32_t field_idx = it.GetMemberIndex();
   dst->SetDexFieldIndex(field_idx);
@@ -2388,8 +2423,11 @@
   dst->SetAccessFlags(it.GetFieldAccessFlags());
 }
 
-void ClassLinker::LoadMethod(Thread* self, const DexFile& dex_file, const ClassDataItemIterator& it,
-                             Handle<mirror::Class> klass, ArtMethod* dst) {
+void ClassLinker::LoadMethod(Thread* self,
+                             const DexFile& dex_file,
+                             const ClassDataItemIterator& it,
+                             Handle<mirror::Class> klass,
+                             ArtMethod* dst) {
   uint32_t dex_method_idx = it.GetMemberIndex();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
   const char* method_name = dex_file.StringDataByIdx(method_id.name_idx_);
@@ -2458,8 +2496,8 @@
 
 bool ClassLinker::IsDexFileRegisteredLocked(const DexFile& dex_file) {
   dex_lock_.AssertSharedHeld(Thread::Current());
-  for (size_t i = 0; i != dex_caches_.size(); ++i) {
-    mirror::DexCache* dex_cache = GetDexCache(i);
+  for (GcRoot<mirror::DexCache>& root : dex_caches_) {
+    mirror::DexCache* dex_cache = root.Read();
     if (dex_cache->GetDexFile() == &dex_file) {
       return true;
     }
@@ -2589,7 +2627,9 @@
 // array class; that always comes from the base element class.
 //
 // Returns null with an exception raised on failure.
-mirror::Class* ClassLinker::CreateArrayClass(Thread* self, const char* descriptor, size_t hash,
+mirror::Class* ClassLinker::CreateArrayClass(Thread* self,
+                                             const char* descriptor,
+                                             size_t hash,
                                              Handle<mirror::ClassLoader> class_loader) {
   // Identify the underlying component type
   CHECK_EQ('[', descriptor[0]);
@@ -2757,8 +2797,7 @@
   return nullptr;
 }
 
-mirror::Class* ClassLinker::InsertClass(const char* descriptor, mirror::Class* klass,
-                                        size_t hash) {
+mirror::Class* ClassLinker::InsertClass(const char* descriptor, mirror::Class* klass, size_t hash) {
   if (VLOG_IS_ON(class_linker)) {
     mirror::DexCache* dex_cache = klass->GetDexCache();
     std::string source;
@@ -2769,11 +2808,13 @@
     LOG(INFO) << "Loaded class " << descriptor << source;
   }
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  mirror::Class* existing = LookupClassFromTableLocked(descriptor, klass->GetClassLoader(), hash);
+  mirror::ClassLoader* const class_loader = klass->GetClassLoader();
+  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+  mirror::Class* existing = class_table->Lookup(descriptor, hash);
   if (existing != nullptr) {
     return existing;
   }
-  if (kIsDebugBuild && !klass->IsTemp() && klass->GetClassLoader() == nullptr &&
+  if (kIsDebugBuild && !klass->IsTemp() && class_loader == nullptr &&
       dex_cache_image_class_lookup_required_) {
     // Check a class loaded with the system class loader matches one in the image if the class
     // is in the image.
@@ -2783,114 +2824,60 @@
     }
   }
   VerifyObject(klass);
-  class_table_.InsertWithHash(GcRoot<mirror::Class>(klass), hash);
+  class_table->InsertWithHash(klass, hash);
   if (log_new_class_table_roots_) {
     new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
   }
   return nullptr;
 }
 
-void ClassLinker::UpdateClassVirtualMethods(mirror::Class* klass, ArtMethod* new_methods,
+void ClassLinker::UpdateClassVirtualMethods(mirror::Class* klass,
+                                            ArtMethod* new_methods,
                                             size_t new_num_methods) {
-  // classlinker_classes_lock_ is used to guard against races between root marking and changing the
-  // direct and virtual method pointers.
-  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  // TODO: Fix the race condition here. b/22832610
   klass->SetNumVirtualMethods(new_num_methods);
   klass->SetVirtualMethodsPtr(new_methods);
-  if (log_new_class_table_roots_) {
-    new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
-  }
-}
-
-mirror::Class* ClassLinker::UpdateClass(const char* descriptor, mirror::Class* klass,
-                                        size_t hash) {
-  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  auto existing_it = class_table_.FindWithHash(std::make_pair(descriptor, klass->GetClassLoader()),
-                                               hash);
-  CHECK(existing_it != class_table_.end());
-  mirror::Class* existing = existing_it->Read();
-  CHECK_NE(existing, klass) << descriptor;
-  CHECK(!existing->IsResolved()) << descriptor;
-  CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusResolving) << descriptor;
-
-  CHECK(!klass->IsTemp()) << descriptor;
-  if (kIsDebugBuild && klass->GetClassLoader() == nullptr &&
-      dex_cache_image_class_lookup_required_) {
-    // Check a class loaded with the system class loader matches one in the image if the class
-    // is in the image.
-    existing = LookupClassFromImage(descriptor);
-    if (existing != nullptr) {
-      CHECK_EQ(klass, existing) << descriptor;
-    }
-  }
-  VerifyObject(klass);
-
-  // Update the element in the hash set.
-  *existing_it = GcRoot<mirror::Class>(klass);
-  if (log_new_class_table_roots_) {
-    new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
-  }
-
-  return existing;
+  // Need to mark the card so that the remembered sets and mod union tables get update.
+  Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(klass);
 }
 
 bool ClassLinker::RemoveClass(const char* descriptor, mirror::ClassLoader* class_loader) {
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  auto pair = std::make_pair(descriptor, class_loader);
-  auto it = class_table_.Find(pair);
-  if (it != class_table_.end()) {
-    class_table_.Erase(it);
-    return true;
-  }
-  it = pre_zygote_class_table_.Find(pair);
-  if (it != pre_zygote_class_table_.end()) {
-    pre_zygote_class_table_.Erase(it);
-    return true;
-  }
-  return false;
+  ClassTable* const class_table = ClassTableForClassLoader(class_loader);
+  return class_table != nullptr && class_table->Remove(descriptor);
 }
 
-mirror::Class* ClassLinker::LookupClass(Thread* self, const char* descriptor, size_t hash,
+mirror::Class* ClassLinker::LookupClass(Thread* self,
+                                        const char* descriptor,
+                                        size_t hash,
                                         mirror::ClassLoader* class_loader) {
   {
     ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    mirror::Class* result = LookupClassFromTableLocked(descriptor, class_loader, hash);
-    if (result != nullptr) {
-      return result;
+    ClassTable* const class_table = ClassTableForClassLoader(class_loader);
+    if (class_table != nullptr) {
+      mirror::Class* result = class_table->Lookup(descriptor, hash);
+      if (result != nullptr) {
+        return result;
+      }
     }
   }
   if (class_loader != nullptr || !dex_cache_image_class_lookup_required_) {
     return nullptr;
+  }
+  // Lookup failed but need to search dex_caches_.
+  mirror::Class* result = LookupClassFromImage(descriptor);
+  if (result != nullptr) {
+    result = InsertClass(descriptor, result, hash);
   } else {
-    // Lookup failed but need to search dex_caches_.
-    mirror::Class* result = LookupClassFromImage(descriptor);
-    if (result != nullptr) {
-      InsertClass(descriptor, result, hash);
-    } else {
-      // Searching the image dex files/caches failed, we don't want to get into this situation
-      // often as map searches are faster, so after kMaxFailedDexCacheLookups move all image
-      // classes into the class table.
-      constexpr uint32_t kMaxFailedDexCacheLookups = 1000;
-      if (++failed_dex_cache_class_lookups_ > kMaxFailedDexCacheLookups) {
-        MoveImageClassesToClassTable();
-      }
-    }
-    return result;
-  }
-}
-
-mirror::Class* ClassLinker::LookupClassFromTableLocked(const char* descriptor,
-                                                       mirror::ClassLoader* class_loader,
-                                                       size_t hash) {
-  auto descriptor_pair = std::make_pair(descriptor, class_loader);
-  auto it = pre_zygote_class_table_.FindWithHash(descriptor_pair, hash);
-  if (it == pre_zygote_class_table_.end()) {
-    it = class_table_.FindWithHash(descriptor_pair, hash);
-    if (it == class_table_.end()) {
-      return nullptr;
+    // Searching the image dex files/caches failed, we don't want to get into this situation
+    // often as map searches are faster, so after kMaxFailedDexCacheLookups move all image
+    // classes into the class table.
+    constexpr uint32_t kMaxFailedDexCacheLookups = 1000;
+    if (++failed_dex_cache_class_lookups_ > kMaxFailedDexCacheLookups) {
+      MoveImageClassesToClassTable();
     }
   }
-  return it->Read();
+  return result;
 }
 
 static mirror::ObjectArray<mirror::DexCache>* GetImageDexCaches()
@@ -2910,6 +2897,7 @@
   ScopedAssertNoThreadSuspension ants(self, "Moving image classes to class table");
   mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
   std::string temp;
+  ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     mirror::ObjectArray<mirror::Class>* types = dex_cache->GetResolvedTypes();
@@ -2919,12 +2907,12 @@
         DCHECK(klass->GetClassLoader() == nullptr);
         const char* descriptor = klass->GetDescriptor(&temp);
         size_t hash = ComputeModifiedUtf8Hash(descriptor);
-        mirror::Class* existing = LookupClassFromTableLocked(descriptor, nullptr, hash);
+        mirror::Class* existing = class_table->Lookup(descriptor, hash);
         if (existing != nullptr) {
           CHECK_EQ(existing, klass) << PrettyClassAndClassLoader(existing) << " != "
               << PrettyClassAndClassLoader(klass);
         } else {
-          class_table_.Insert(GcRoot<mirror::Class>(klass));
+          class_table->Insert(klass);
           if (log_new_class_table_roots_) {
             new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
           }
@@ -2937,9 +2925,9 @@
 
 void ClassLinker::MoveClassTableToPreZygote() {
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  DCHECK(pre_zygote_class_table_.Empty());
-  pre_zygote_class_table_ = std::move(class_table_);
-  class_table_.Clear();
+  for (auto& class_table : classes_) {
+    class_table.second->FreezeSnapshot();
+  }
 }
 
 mirror::Class* ClassLinker::LookupClassFromImage(const char* descriptor) {
@@ -2965,37 +2953,21 @@
   return nullptr;
 }
 
-void ClassLinker::LookupClasses(const char* descriptor, std::vector<mirror::Class*>& result) {
+void ClassLinker::LookupClasses(const char* descriptor,
+                                out<std::vector<mirror::Class*>> out_result) {
+  std::vector<mirror::Class*>& result = *out_result;
   result.clear();
   if (dex_cache_image_class_lookup_required_) {
     MoveImageClassesToClassTable();
   }
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  while (true) {
-    auto it = class_table_.Find(descriptor);
-    if (it == class_table_.end()) {
-      break;
+  for (auto& pair : classes_) {
+    // There can only be one class with the same descriptor per class loader.
+    ClassTable* const class_table  = pair.second;
+    mirror::Class* klass = class_table->Lookup(descriptor, ComputeModifiedUtf8Hash(descriptor));
+    if (klass != nullptr) {
+      result.push_back(klass);
     }
-    result.push_back(it->Read());
-    class_table_.Erase(it);
-  }
-  for (mirror::Class* k : result) {
-    class_table_.Insert(GcRoot<mirror::Class>(k));
-  }
-  size_t pre_zygote_start = result.size();
-  // Now handle the pre zygote table.
-  // Note: This dirties the pre-zygote table but shouldn't be an issue since LookupClasses is only
-  // called from the debugger.
-  while (true) {
-    auto it = pre_zygote_class_table_.Find(descriptor);
-    if (it == pre_zygote_class_table_.end()) {
-      break;
-    }
-    result.push_back(it->Read());
-    pre_zygote_class_table_.Erase(it);
-  }
-  for (size_t i = pre_zygote_start; i < result.size(); ++i) {
-    pre_zygote_class_table_.Insert(GcRoot<mirror::Class>(result[i]));
   }
 }
 
@@ -3160,7 +3132,8 @@
   }
 }
 
-bool ClassLinker::VerifyClassUsingOatFile(const DexFile& dex_file, mirror::Class* klass,
+bool ClassLinker::VerifyClassUsingOatFile(const DexFile& dex_file,
+                                          mirror::Class* klass,
                                           mirror::Class::Status& oat_file_class_status) {
   // If we're compiling, we can only verify the class using the oat file if
   // we are not compiling the image or if the class we're verifying is not part of
@@ -3282,9 +3255,12 @@
   }
 }
 
-mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
-                                             jobjectArray interfaces, jobject loader,
-                                             jobjectArray methods, jobjectArray throws) {
+mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa,
+                                             jstring name,
+                                             jobjectArray interfaces,
+                                             jobject loader,
+                                             jobjectArray methods,
+                                             jobjectArray throws) {
   Thread* self = soa.Self();
   StackHandleScope<10> hs(self);
   MutableHandle<mirror::Class> klass(hs.NewHandle(
@@ -3303,7 +3279,7 @@
   klass->SetDexCache(GetClassRoot(kJavaLangReflectProxy)->GetDexCache());
   mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
   std::string descriptor(GetDescriptorForProxy(klass.Get()));
-  size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
+  const size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
 
   // Insert the class before loading the fields as the field roots
   // (ArtField::declaring_class_) are only visited from the class
@@ -3379,7 +3355,7 @@
     // The new class will replace the old one in the class table.
     Handle<mirror::ObjectArray<mirror::Class>> h_interfaces(
         hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces)));
-    if (!LinkClass(self, descriptor.c_str(), klass, h_interfaces, &new_class)) {
+    if (!LinkClass(self, descriptor.c_str(), klass, h_interfaces, outof(new_class))) {
       mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
       return nullptr;
     }
@@ -3484,7 +3460,8 @@
   DCHECK(constructor->IsPublic());
 }
 
-void ClassLinker::CreateProxyMethod(Handle<mirror::Class> klass, ArtMethod* prototype,
+void ClassLinker::CreateProxyMethod(Handle<mirror::Class> klass,
+                                    ArtMethod* prototype,
                                     ArtMethod* out) {
   // Ensure prototype is in dex cache so that we can use the dex cache to look up the overridden
   // prototype method
@@ -3530,7 +3507,8 @@
   CHECK_EQ(np->GetReturnType(), prototype->GetReturnType());
 }
 
-bool ClassLinker::CanWeInitializeClass(mirror::Class* klass, bool can_init_statics,
+bool ClassLinker::CanWeInitializeClass(mirror::Class* klass,
+                                       bool can_init_statics,
                                        bool can_init_parents) {
   if (can_init_statics && can_init_parents) {
     return true;
@@ -3560,8 +3538,10 @@
   return CanWeInitializeClass(super_class, can_init_statics, can_init_parents);
 }
 
-bool ClassLinker::InitializeClass(Thread* self, Handle<mirror::Class> klass,
-                                  bool can_init_statics, bool can_init_parents) {
+bool ClassLinker::InitializeClass(Thread* self,
+                                  Handle<mirror::Class> klass,
+                                  bool can_init_statics,
+                                  bool can_init_parents) {
   // see JLS 3rd edition, 12.4.2 "Detailed Initialization Procedure" for the locking protocol
 
   // Are we already initialized and therefore done?
@@ -3630,7 +3610,7 @@
         return true;
       }
       // No. That's fine. Wait for another thread to finish initializing.
-      return WaitForInitializeClass(klass, self, lock);
+      return WaitForInitializeClass(klass, self, &lock);
     }
 
     if (!ValidateSuperClassDescriptors(klass)) {
@@ -3764,13 +3744,16 @@
   return success;
 }
 
-bool ClassLinker::WaitForInitializeClass(Handle<mirror::Class> klass, Thread* self,
-                                         ObjectLock<mirror::Class>& lock)
+bool ClassLinker::WaitForInitializeClass(Handle<mirror::Class> klass,
+                                         Thread* self,
+                                         ObjectLock<mirror::Class>* lock)
     SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(lock != nullptr);
+
   while (true) {
     self->AssertNoPendingException();
     CHECK(!klass->IsInitialized());
-    lock.WaitIgnoringInterrupts();
+    lock->WaitIgnoringInterrupts();
 
     // When we wake up, repeat the test for init-in-progress.  If
     // there's an exception pending (only possible if
@@ -3833,7 +3816,8 @@
                                                    Handle<mirror::Class> super_klass,
                                                    ArtMethod* method,
                                                    ArtMethod* m,
-                                                   uint32_t index, uint32_t arg_type_idx)
+                                                   uint32_t index,
+                                                   uint32_t arg_type_idx)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   DCHECK(Thread::Current()->IsExceptionPending());
   DCHECK(!m->IsProxyMethod());
@@ -3995,7 +3979,8 @@
   return true;
 }
 
-bool ClassLinker::EnsureInitialized(Thread* self, Handle<mirror::Class> c, bool can_init_fields,
+bool ClassLinker::EnsureInitialized(Thread* self, Handle<mirror::Class> c,
+                                    bool can_init_fields,
                                     bool can_init_parents) {
   DCHECK(c.Get() != nullptr);
   if (c->IsInitialized()) {
@@ -4016,7 +4001,7 @@
 void ClassLinker::FixupTemporaryDeclaringClass(mirror::Class* temp_class,
                                                mirror::Class* new_class) {
   ArtField* fields = new_class->GetIFields();
-  DCHECK_EQ(temp_class->NumInstanceFields(), new_class->NumInstanceFields());
+  DCHECK_EQ(temp_class->NumInstanceFields(), 0u);
   for (size_t i = 0, count = new_class->NumInstanceFields(); i < count; i++) {
     if (fields[i].GetDeclaringClass() == temp_class) {
       fields[i].SetDeclaringClass(new_class);
@@ -4024,31 +4009,56 @@
   }
 
   fields = new_class->GetSFields();
-  DCHECK_EQ(temp_class->NumStaticFields(), new_class->NumStaticFields());
+  DCHECK_EQ(temp_class->NumStaticFields(), 0u);
   for (size_t i = 0, count = new_class->NumStaticFields(); i < count; i++) {
     if (fields[i].GetDeclaringClass() == temp_class) {
       fields[i].SetDeclaringClass(new_class);
     }
   }
 
-  DCHECK_EQ(temp_class->NumDirectMethods(), new_class->NumDirectMethods());
+  DCHECK_EQ(temp_class->NumDirectMethods(), 0u);
   for (auto& method : new_class->GetDirectMethods(image_pointer_size_)) {
     if (method.GetDeclaringClass() == temp_class) {
       method.SetDeclaringClass(new_class);
     }
   }
 
-  DCHECK_EQ(temp_class->NumVirtualMethods(), new_class->NumVirtualMethods());
+  DCHECK_EQ(temp_class->NumVirtualMethods(), 0u);
   for (auto& method : new_class->GetVirtualMethods(image_pointer_size_)) {
     if (method.GetDeclaringClass() == temp_class) {
       method.SetDeclaringClass(new_class);
     }
   }
+
+  // Make sure the remembered set and mod-union tables know that we updated some of the native
+  // roots.
+  Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(new_class);
 }
 
-bool ClassLinker::LinkClass(Thread* self, const char* descriptor, Handle<mirror::Class> klass,
+ClassTable* ClassLinker::InsertClassTableForClassLoader(mirror::ClassLoader* class_loader) {
+  auto it = classes_.find(GcRoot<mirror::ClassLoader>(class_loader));
+  if (it != classes_.end()) {
+    return it->second;
+  }
+  // Class table for loader not found, add it to the table.
+  auto* const class_table = new ClassTable;
+  classes_.Put(GcRoot<mirror::ClassLoader>(class_loader), class_table);
+  return class_table;
+}
+
+ClassTable* ClassLinker::ClassTableForClassLoader(mirror::ClassLoader* class_loader) {
+  auto it = classes_.find(GcRoot<mirror::ClassLoader>(class_loader));
+  if (it != classes_.end()) {
+    return it->second;
+  }
+  return nullptr;
+}
+
+bool ClassLinker::LinkClass(Thread* self,
+                            const char* descriptor,
+                            Handle<mirror::Class> klass,
                             Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                            MutableHandle<mirror::Class>* h_new_class_out) {
+                            out<MutableHandle<mirror::Class>> h_new_class_out) {
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
 
   if (!LinkSuperClass(klass)) {
@@ -4056,14 +4066,14 @@
   }
   ArtMethod* imt[mirror::Class::kImtSize];
   std::fill_n(imt, arraysize(imt), Runtime::Current()->GetImtUnimplementedMethod());
-  if (!LinkMethods(self, klass, interfaces, imt)) {
+  if (!LinkMethods(self, klass, interfaces, outof(imt))) {
     return false;
   }
   if (!LinkInstanceFields(self, klass)) {
     return false;
   }
   size_t class_size;
-  if (!LinkStaticFields(self, klass, &class_size)) {
+  if (!LinkStaticFields(self, klass, outof(class_size))) {
     return false;
   }
   CreateReferenceInstanceOffsets(klass);
@@ -4087,6 +4097,14 @@
     // Retire the temporary class and create the correctly sized resolved class.
     StackHandleScope<1> hs(self);
     auto h_new_class = hs.NewHandle(klass->CopyOf(self, class_size, imt, image_pointer_size_));
+    // Set array lengths to 0 since we don't want the GC to visit two different classes with the
+    // same ArtFields with the same If this occurs, it causes bugs in remembered sets since the GC
+    // may not see any references to the from space and clean the card. Though there was references
+    // to the from space that got marked by the first class.
+    klass->SetNumDirectMethods(0);
+    klass->SetNumVirtualMethods(0);
+    klass->SetNumStaticFields(0);
+    klass->SetNumInstanceFields(0);
     if (UNLIKELY(h_new_class.Get() == nullptr)) {
       self->AssertPendingOOMException();
       mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
@@ -4096,9 +4114,26 @@
     CHECK_EQ(h_new_class->GetClassSize(), class_size);
     ObjectLock<mirror::Class> lock(self, h_new_class);
     FixupTemporaryDeclaringClass(klass.Get(), h_new_class.Get());
-    mirror::Class* existing = UpdateClass(descriptor, h_new_class.Get(),
-                                          ComputeModifiedUtf8Hash(descriptor));
-    CHECK(existing == nullptr || existing == klass.Get());
+
+    {
+      WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+      mirror::ClassLoader* const class_loader = h_new_class.Get()->GetClassLoader();
+      ClassTable* const table = InsertClassTableForClassLoader(class_loader);
+      mirror::Class* existing = table->UpdateClass(descriptor, h_new_class.Get(),
+                                                   ComputeModifiedUtf8Hash(descriptor));
+      CHECK_EQ(existing, klass.Get());
+      if (kIsDebugBuild && class_loader == nullptr && dex_cache_image_class_lookup_required_) {
+        // Check a class loaded with the system class loader matches one in the image if the class
+        // is in the image.
+        mirror::Class* const image_class = LookupClassFromImage(descriptor);
+        if (image_class != nullptr) {
+          CHECK_EQ(klass.Get(), existing) << descriptor;
+        }
+      }
+      if (log_new_class_table_roots_) {
+        new_class_roots_.push_back(GcRoot<mirror::Class>(h_new_class.Get()));
+      }
+    }
 
     // This will notify waiters on temp class that saw the not yet resolved class in the
     // class_table_ during EnsureResolved.
@@ -4114,30 +4149,31 @@
   return true;
 }
 
-static void CountMethodsAndFields(ClassDataItemIterator& dex_data,
-                                  size_t* virtual_methods,
-                                  size_t* direct_methods,
-                                  size_t* static_fields,
-                                  size_t* instance_fields) {
+static void CountMethodsAndFields(ClassDataItemIterator* dex_data,
+                                  out<size_t> virtual_methods,
+                                  out<size_t> direct_methods,
+                                  out<size_t> static_fields,
+                                  out<size_t> instance_fields) {
+  DCHECK(dex_data != nullptr);
   *virtual_methods = *direct_methods = *static_fields = *instance_fields = 0;
 
-  while (dex_data.HasNextStaticField()) {
-    dex_data.Next();
+  while (dex_data->HasNextStaticField()) {
+    dex_data->Next();
     (*static_fields)++;
   }
-  while (dex_data.HasNextInstanceField()) {
-    dex_data.Next();
+  while (dex_data->HasNextInstanceField()) {
+    dex_data->Next();
     (*instance_fields)++;
   }
-  while (dex_data.HasNextDirectMethod()) {
+  while (dex_data->HasNextDirectMethod()) {
     (*direct_methods)++;
-    dex_data.Next();
+    dex_data->Next();
   }
-  while (dex_data.HasNextVirtualMethod()) {
+  while (dex_data->HasNextVirtualMethod()) {
     (*virtual_methods)++;
-    dex_data.Next();
+    dex_data->Next();
   }
-  DCHECK(!dex_data.HasNext());
+  DCHECK(!dex_data->HasNext());
 }
 
 static void DumpClass(std::ostream& os,
@@ -4171,8 +4207,10 @@
   }
 }
 
-static std::string DumpClasses(const DexFile& dex_file1, const DexFile::ClassDef& dex_class_def1,
-                               const DexFile& dex_file2, const DexFile::ClassDef& dex_class_def2) {
+static std::string DumpClasses(const DexFile& dex_file1,
+                               const DexFile::ClassDef& dex_class_def1,
+                               const DexFile& dex_file2,
+                               const DexFile::ClassDef& dex_class_def2) {
   std::ostringstream os;
   DumpClass(os, dex_file1, dex_class_def1, " (Compile time)");
   DumpClass(os, dex_file2, dex_class_def2, " (Runtime)");
@@ -4182,20 +4220,28 @@
 
 // Very simple structural check on whether the classes match. Only compares the number of
 // methods and fields.
-static bool SimpleStructuralCheck(const DexFile& dex_file1, const DexFile::ClassDef& dex_class_def1,
-                                  const DexFile& dex_file2, const DexFile::ClassDef& dex_class_def2,
+static bool SimpleStructuralCheck(const DexFile& dex_file1,
+                                  const DexFile::ClassDef& dex_class_def1,
+                                  const DexFile& dex_file2,
+                                  const DexFile::ClassDef& dex_class_def2,
                                   std::string* error_msg) {
   ClassDataItemIterator dex_data1(dex_file1, dex_file1.GetClassData(dex_class_def1));
   ClassDataItemIterator dex_data2(dex_file2, dex_file2.GetClassData(dex_class_def2));
 
   // Counters for current dex file.
   size_t dex_virtual_methods1, dex_direct_methods1, dex_static_fields1, dex_instance_fields1;
-  CountMethodsAndFields(dex_data1, &dex_virtual_methods1, &dex_direct_methods1, &dex_static_fields1,
-                        &dex_instance_fields1);
+  CountMethodsAndFields(&dex_data1,
+                        outof(dex_virtual_methods1),
+                        outof(dex_direct_methods1),
+                        outof(dex_static_fields1),
+                        outof(dex_instance_fields1));
   // Counters for compile-time dex file.
   size_t dex_virtual_methods2, dex_direct_methods2, dex_static_fields2, dex_instance_fields2;
-  CountMethodsAndFields(dex_data2, &dex_virtual_methods2, &dex_direct_methods2, &dex_static_fields2,
-                        &dex_instance_fields2);
+  CountMethodsAndFields(&dex_data2,
+                        outof(dex_virtual_methods2),
+                        outof(dex_direct_methods2),
+                        outof(dex_static_fields2),
+                        outof(dex_instance_fields2));
 
   if (dex_virtual_methods1 != dex_virtual_methods2) {
     std::string class_dump = DumpClasses(dex_file1, dex_class_def1, dex_file2, dex_class_def2);
@@ -4398,9 +4444,10 @@
 }
 
 // Populate the class vtable and itable. Compute return type indices.
-bool ClassLinker::LinkMethods(Thread* self, Handle<mirror::Class> klass,
+bool ClassLinker::LinkMethods(Thread* self,
+                              Handle<mirror::Class> klass,
                               Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                              ArtMethod** out_imt) {
+                              out<ArtMethod* [mirror::Class::kImtSize]> out_imt) {
   self->AllowThreadSuspension();
   if (klass->IsInterface()) {
     // No vtable.
@@ -4415,7 +4462,10 @@
   } else if (!LinkVirtualMethods(self, klass)) {  // Link virtual methods first.
     return false;
   }
-  return LinkInterfaceMethods(self, klass, interfaces, out_imt);  // Link interface method last.
+  return LinkInterfaceMethods(self,
+                              klass,
+                              interfaces,
+                              outof_forward(out_imt));  // Link interface method last.
 }
 
 // Comparator for name and signature of a method, used in finding overriding methods. Implementation
@@ -4468,7 +4518,9 @@
 
 class LinkVirtualHashTable {
  public:
-  LinkVirtualHashTable(Handle<mirror::Class> klass, size_t hash_size, uint32_t* hash_table,
+  LinkVirtualHashTable(Handle<mirror::Class> klass,
+                       size_t hash_size,
+                       uint32_t* hash_table,
                        size_t image_pointer_size)
      : klass_(klass), hash_size_(hash_size), hash_table_(hash_table),
        image_pointer_size_(image_pointer_size) {
@@ -4672,9 +4724,12 @@
   return true;
 }
 
-bool ClassLinker::LinkInterfaceMethods(Thread* self, Handle<mirror::Class> klass,
+bool ClassLinker::LinkInterfaceMethods(Thread* self,
+                                       Handle<mirror::Class> klass,
                                        Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                                       ArtMethod** out_imt) {
+                                       out<ArtMethod* [mirror::Class::kImtSize]> out_imt_array) {
+  auto& out_imt = *out_imt_array;
+
   StackHandleScope<3> hs(self);
   Runtime* const runtime = Runtime::Current();
   const bool has_superclass = klass->HasSuperClass();
@@ -4862,7 +4917,7 @@
     }
   }
 
-  auto* old_cause = self->StartAssertNoThreadSuspension(
+  const char* old_cause = self->StartAssertNoThreadSuspension(
       "Copying ArtMethods for LinkInterfaceMethods");
   for (size_t i = 0; i < ifcount; ++i) {
     size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
@@ -5083,12 +5138,16 @@
 
 bool ClassLinker::LinkInstanceFields(Thread* self, Handle<mirror::Class> klass) {
   CHECK(klass.Get() != nullptr);
-  return LinkFields(self, klass, false, nullptr);
+  size_t class_size_dont_care;
+  UNUSED(class_size_dont_care);  // This doesn't get set for instance fields.
+  return LinkFields(self, klass, false, outof(class_size_dont_care));
 }
 
-bool ClassLinker::LinkStaticFields(Thread* self, Handle<mirror::Class> klass, size_t* class_size) {
+bool ClassLinker::LinkStaticFields(Thread* self,
+                                   Handle<mirror::Class> klass,
+                                   out<size_t> class_size) {
   CHECK(klass.Get() != nullptr);
-  return LinkFields(self, klass, true, class_size);
+  return LinkFields(self, klass, true, outof_forward(class_size));
 }
 
 struct LinkFieldsComparator {
@@ -5125,8 +5184,10 @@
   }
 };
 
-bool ClassLinker::LinkFields(Thread* self, Handle<mirror::Class> klass, bool is_static,
-                             size_t* class_size) {
+bool ClassLinker::LinkFields(Thread* self,
+                             Handle<mirror::Class> klass,
+                             bool is_static,
+                             out<size_t> class_size) {
   self->AllowThreadSuspension();
   const size_t num_fields = is_static ? klass->NumStaticFields() : klass->NumInstanceFields();
   ArtField* const fields = is_static ? klass->GetSFields() : klass->GetIFields();
@@ -5297,7 +5358,8 @@
   klass->SetReferenceInstanceOffsets(reference_offsets);
 }
 
-mirror::String* ClassLinker::ResolveString(const DexFile& dex_file, uint32_t string_idx,
+mirror::String* ClassLinker::ResolveString(const DexFile& dex_file,
+                                           uint32_t string_idx,
                                            Handle<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache.Get() != nullptr);
   mirror::String* resolved = dex_cache->GetResolvedString(string_idx);
@@ -5311,7 +5373,8 @@
   return string;
 }
 
-mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
+mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
+                                        uint16_t type_idx,
                                         mirror::Class* referrer) {
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
@@ -5319,7 +5382,8 @@
   return ResolveType(dex_file, type_idx, dex_cache, class_loader);
 }
 
-mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
+mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
+                                        uint16_t type_idx,
                                         Handle<mirror::DexCache> dex_cache,
                                         Handle<mirror::ClassLoader> class_loader) {
   DCHECK(dex_cache.Get() != nullptr);
@@ -5352,7 +5416,8 @@
   return resolved;
 }
 
-ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file, uint32_t method_idx,
+ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
+                                      uint32_t method_idx,
                                       Handle<mirror::DexCache> dex_cache,
                                       Handle<mirror::ClassLoader> class_loader,
                                       ArtMethod* referrer, InvokeType type) {
@@ -5509,9 +5574,11 @@
   }
 }
 
-ArtField* ClassLinker::ResolveField(const DexFile& dex_file, uint32_t field_idx,
+ArtField* ClassLinker::ResolveField(const DexFile& dex_file,
+                                    uint32_t field_idx,
                                     Handle<mirror::DexCache> dex_cache,
-                                    Handle<mirror::ClassLoader> class_loader, bool is_static) {
+                                    Handle<mirror::ClassLoader> class_loader,
+                                    bool is_static) {
   DCHECK(dex_cache.Get() != nullptr);
   ArtField* resolved = dex_cache->GetResolvedField(field_idx, image_pointer_size_);
   if (resolved != nullptr) {
@@ -5550,7 +5617,8 @@
   return resolved;
 }
 
-ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file, uint32_t field_idx,
+ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file,
+                                       uint32_t field_idx,
                                        Handle<mirror::DexCache> dex_cache,
                                        Handle<mirror::ClassLoader> class_loader) {
   DCHECK(dex_cache.Get() != nullptr);
@@ -5580,7 +5648,8 @@
   return resolved;
 }
 
-const char* ClassLinker::MethodShorty(uint32_t method_idx, ArtMethod* referrer,
+const char* ClassLinker::MethodShorty(uint32_t method_idx,
+                                      ArtMethod* referrer,
                                       uint32_t* length) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
   mirror::DexCache* dex_cache = declaring_class->GetDexCache();
@@ -5589,23 +5658,22 @@
   return dex_file.GetMethodShorty(method_id, length);
 }
 
-void ClassLinker::DumpAllClasses(int flags) {
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
-  }
-  // TODO: at the time this was written, it wasn't safe to call PrettyField with the ClassLinker
-  // lock held, because it might need to resolve a field's type, which would try to take the lock.
-  std::vector<mirror::Class*> all_classes;
-  {
-    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    for (GcRoot<mirror::Class>& it : class_table_) {
-      all_classes.push_back(it.Read());
-    }
+class DumpClassVisitor : public ClassVisitor {
+ public:
+  explicit DumpClassVisitor(int flags) : flags_(flags) {}
+
+  bool Visit(mirror::Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    klass->DumpClass(LOG(ERROR), flags_);
+    return true;
   }
 
-  for (size_t i = 0; i < all_classes.size(); ++i) {
-    all_classes[i]->DumpClass(std::cerr, flags);
-  }
+ private:
+  const int flags_;
+};
+
+void ClassLinker::DumpAllClasses(int flags) {
+  DumpClassVisitor visitor(flags);
+  VisitClasses(&visitor);
 }
 
 static OatFile::OatMethod CreateOatMethod(const void* code) {
@@ -5658,8 +5726,24 @@
     MoveImageClassesToClassTable();
   }
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  os << "Zygote loaded classes=" << pre_zygote_class_table_.Size() << " post zygote classes="
-     << class_table_.Size() << "\n";
+  os << "Zygote loaded classes=" << NumZygoteClasses() << " post zygote classes="
+     << NumNonZygoteClasses() << "\n";
+}
+
+size_t ClassLinker::NumZygoteClasses() const {
+  size_t sum = 0;
+  for (auto& pair : classes_) {
+    sum += pair.second->NumZygoteClasses();
+  }
+  return sum;
+}
+
+size_t ClassLinker::NumNonZygoteClasses() const {
+  size_t sum = 0;
+  for (auto& pair : classes_) {
+    sum += pair.second->NumNonZygoteClasses();
+  }
+  return sum;
 }
 
 size_t ClassLinker::NumLoadedClasses() {
@@ -5668,7 +5752,7 @@
   }
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   // Only return non zygote classes since these are the ones which apps which care about.
-  return class_table_.Size();
+  return NumNonZygoteClasses();
 }
 
 pid_t ClassLinker::GetClassesLockOwner() {
@@ -5739,43 +5823,6 @@
   return descriptor;
 }
 
-std::size_t ClassLinker::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
-    const {
-  std::string temp;
-  return ComputeModifiedUtf8Hash(root.Read()->GetDescriptor(&temp));
-}
-
-bool ClassLinker::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
-                                                        const GcRoot<mirror::Class>& b) const {
-  if (a.Read()->GetClassLoader() != b.Read()->GetClassLoader()) {
-    return false;
-  }
-  std::string temp;
-  return a.Read()->DescriptorEquals(b.Read()->GetDescriptor(&temp));
-}
-
-std::size_t ClassLinker::ClassDescriptorHashEquals::operator()(
-    const std::pair<const char*, mirror::ClassLoader*>& element) const {
-  return ComputeModifiedUtf8Hash(element.first);
-}
-
-bool ClassLinker::ClassDescriptorHashEquals::operator()(
-    const GcRoot<mirror::Class>& a, const std::pair<const char*, mirror::ClassLoader*>& b) const {
-  if (a.Read()->GetClassLoader() != b.second) {
-    return false;
-  }
-  return a.Read()->DescriptorEquals(b.first);
-}
-
-bool ClassLinker::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
-                                                        const char* descriptor) const {
-  return a.Read()->DescriptorEquals(descriptor);
-}
-
-std::size_t ClassLinker::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
-  return ComputeModifiedUtf8Hash(descriptor);
-}
-
 bool ClassLinker::MayBeCalledWithDirectCodePointer(ArtMethod* m) {
   if (Runtime::Current()->UseJit()) {
     // JIT can have direct code pointers from any method to any other method.
@@ -5809,7 +5856,8 @@
   }
 }
 
-jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) {
+jobject ClassLinker::CreatePathClassLoader(Thread* self,
+                                           const std::vector<const DexFile*>& dex_files) {
   // SOAAlreadyRunnable is protected, and we need something to add a global reference.
   // We could move the jobject to the callers, but all call-sites do this...
   ScopedObjectAccessUnchecked soa(self);
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 05a809e..54f1f3d 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -25,6 +25,8 @@
 #include "base/hash_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "base/out_fwd.h"
+#include "class_table.h"
 #include "dex_file.h"
 #include "gc_root.h"
 #include "jni.h"
@@ -56,8 +58,6 @@
 class ScopedObjectAccessAlreadyRunnable;
 template<size_t kNumReferences> class PACKED(4) StackHandleScope;
 
-typedef bool (ClassVisitor)(mirror::Class* c, void* arg);
-
 enum VisitRootFlags : uint8_t;
 
 class ClassLinker {
@@ -109,16 +109,19 @@
 
   // Initialize class linker by bootstraping from dex files.
   void InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Initialize class linker from one or more images.
   void InitFromImage() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
 
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
-  mirror::Class* FindClass(Thread* self, const char* descriptor,
+  mirror::Class* FindClass(Thread* self,
+                           const char* descriptor,
                            Handle<mirror::ClassLoader> class_loader)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Finds a class in the path class loader, loading it if necessary without using JNI. Hash
   // function is supposed to be ComputeModifiedUtf8Hash(descriptor). Returns true if the
@@ -126,19 +129,24 @@
   // was encountered while walking the parent chain (currently only BootClassLoader and
   // PathClassLoader are supported).
   bool FindClassInPathClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                                  Thread* self, const char* descriptor, size_t hash,
+                                  Thread* self,
+                                  const char* descriptor,
+                                  size_t hash,
                                   Handle<mirror::ClassLoader> class_loader,
-                                  mirror::Class** result)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+                                  out<mirror::Class*> result)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Finds a class by its descriptor using the "system" class loader, ie by searching the
   // boot_class_path_.
   mirror::Class* FindSystemClass(Thread* self, const char* descriptor)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Finds the array class given for the element class.
-  mirror::Class* FindArrayClass(Thread* self, mirror::Class** element_class)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+  mirror::Class* FindArrayClass(Thread* self, /* in parameter */ mirror::Class** element_class)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Returns true if the class linker is initialized.
   bool IsInitialized() const {
@@ -146,20 +154,27 @@
   }
 
   // Define a new a class based on a ClassDef from a DexFile
-  mirror::Class* DefineClass(Thread* self, const char* descriptor, size_t hash,
+  mirror::Class* DefineClass(Thread* self,
+                             const char* descriptor,
+                             size_t hash,
                              Handle<mirror::ClassLoader> class_loader,
-                             const DexFile& dex_file, const DexFile::ClassDef& dex_class_def)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+                             const DexFile& dex_file,
+                             const DexFile::ClassDef& dex_class_def)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Finds a class by its descriptor, returning null if it isn't wasn't loaded
   // by the given 'class_loader'.
-  mirror::Class* LookupClass(Thread* self, const char* descriptor, size_t hash,
-                             mirror::ClassLoader* class_loader)
+  mirror::Class* LookupClass(Thread* self,
+                             const char* descriptor,
+                             size_t hash,
+                             mirror::ClassLoader*
+                             class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Finds all the classes with the given descriptor, regardless of ClassLoader.
-  void LookupClasses(const char* descriptor, std::vector<mirror::Class*>& classes)
+  void LookupClasses(const char* descriptor, out<std::vector<mirror::Class*>> classes)
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -167,17 +182,21 @@
 
   // General class unloading is not supported, this is used to prune
   // unwanted classes during image writing.
-  bool RemoveClass(const char* descriptor, mirror::ClassLoader* class_loader)
-      REQUIRES(!Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool RemoveClass(const char* descriptor,
+                   mirror::ClassLoader* class_loader)
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void DumpAllClasses(int flags)
-      REQUIRES(!Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   void DumpForSigQuit(std::ostream& os)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
   size_t NumLoadedClasses()
-      REQUIRES(!Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
@@ -187,75 +206,95 @@
 
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache.
-  mirror::String* ResolveString(const DexFile& dex_file, uint32_t string_idx,
+  mirror::String* ResolveString(const DexFile& dex_file,
+                                uint32_t string_idx,
                                 Handle<mirror::DexCache> dex_cache)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identity the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx, mirror::Class* referrer)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+  mirror::Class* ResolveType(const DexFile& dex_file,
+                             uint16_t type_idx,
+                             mirror::Class* referrer)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(uint16_t type_idx, ArtMethod* referrer)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+  mirror::Class* ResolveType(uint16_t type_idx,
+                             ArtMethod* referrer)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
-  mirror::Class* ResolveType(uint16_t type_idx, ArtField* referrer)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+  mirror::Class* ResolveType(uint16_t type_idx,
+                             ArtField* referrer)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // Resolve a type with the given ID from the DexFile, storing the
   // result in DexCache. The ClassLoader is used to search for the
   // type, since it may be referenced from but not contained within
   // the given DexFile.
-  mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx,
+  mirror::Class* ResolveType(const DexFile& dex_file,
+                             uint16_t type_idx,
                              Handle<mirror::DexCache> dex_cache,
                              Handle<mirror::ClassLoader> class_loader)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // Resolve a method with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
   // in ResolveType. What is unique is the method type argument which
   // is used to determine if this method is a direct, static, or
   // virtual method.
-  ArtMethod* ResolveMethod(const DexFile& dex_file, uint32_t method_idx,
+  ArtMethod* ResolveMethod(const DexFile& dex_file,
+                           uint32_t method_idx,
                            Handle<mirror::DexCache> dex_cache,
-                           Handle<mirror::ClassLoader> class_loader, ArtMethod* referrer,
+                           Handle<mirror::ClassLoader> class_loader,
+                           ArtMethod* referrer,
                            InvokeType type)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   ArtMethod* GetResolvedMethod(uint32_t method_idx, ArtMethod* referrer)
       SHARED_REQUIRES(Locks::mutator_lock_);
   ArtMethod* ResolveMethod(Thread* self, uint32_t method_idx, ArtMethod* referrer, InvokeType type)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   ArtField* GetResolvedField(uint32_t field_idx, mirror::Class* field_declaring_class)
       SHARED_REQUIRES(Locks::mutator_lock_);
   ArtField* GetResolvedField(uint32_t field_idx, mirror::DexCache* dex_cache)
       SHARED_REQUIRES(Locks::mutator_lock_);
   ArtField* ResolveField(uint32_t field_idx, ArtMethod* referrer, bool is_static)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // Resolve a field with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
   // in ResolveType. What is unique is the is_static argument which is
   // used to determine if we are resolving a static or non-static
   // field.
-  ArtField* ResolveField(const DexFile& dex_file, uint32_t field_idx,
+  ArtField* ResolveField(const DexFile& dex_file,
+                         uint32_t field_idx,
                          Handle<mirror::DexCache> dex_cache,
-                         Handle<mirror::ClassLoader> class_loader, bool is_static)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+                         Handle<mirror::ClassLoader> class_loader,
+                         bool is_static)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // Resolve a field with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
   // in ResolveType. No is_static argument is provided so that Java
   // field resolution semantics are followed.
-  ArtField* ResolveFieldJLS(const DexFile& dex_file, uint32_t field_idx,
+  ArtField* ResolveFieldJLS(const DexFile& dex_file,
+                            uint32_t field_idx,
                             Handle<mirror::DexCache> dex_cache,
                             Handle<mirror::ClassLoader> class_loader)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // Get shorty from method index without resolution. Used to do handlerization.
   const char* MethodShorty(uint32_t method_idx, ArtMethod* referrer, uint32_t* length)
@@ -264,9 +303,12 @@
   // Returns true on success, false if there's an exception pending.
   // can_run_clinit=false allows the compiler to attempt to init a class,
   // given the restriction that no <clinit> execution is possible.
-  bool EnsureInitialized(Thread* self, Handle<mirror::Class> c, bool can_init_fields,
+  bool EnsureInitialized(Thread* self,
+                         Handle<mirror::Class> c,
+                         bool can_init_fields,
                          bool can_init_parents)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // Initializes classes that have instances in the image but that have
   // <clinit> methods so they could not be initialized by the compiler.
@@ -289,27 +331,34 @@
   const OatFile* GetPrimaryOatFile()
       REQUIRES(!dex_lock_);
 
-  void VisitClasses(ClassVisitor* visitor, void* arg)
-      REQUIRES(!Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+  void VisitClasses(ClassVisitor* visitor)
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Less efficient variant of VisitClasses that copies the class_table_ into secondary storage
   // so that it can visit individual classes without holding the doesn't hold the
   // Locks::classlinker_classes_lock_. As the Locks::classlinker_classes_lock_ isn't held this code
   // can race with insertion and deletion of classes while the visitor is being called.
-  void VisitClassesWithoutClassesLock(ClassVisitor* visitor, void* arg)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+  void VisitClassesWithoutClassesLock(ClassVisitor* visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags)
-      REQUIRES(!Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
-      REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   mirror::DexCache* FindDexCache(const DexFile& dex_file)
-      REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsDexFileRegistered(const DexFile& dex_file)
-      REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupDexCaches(ArtMethod* resolution_method)
-      REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Finds or creates the oat file holding dex_location. Then loads and returns
   // all corresponding dex files (there may be more than one dex file loaded
@@ -325,58 +374,75 @@
   // This method should not be called with the mutator_lock_ held, because it
   // could end up starving GC if we need to generate or relocate any oat
   // files.
-  std::vector<std::unique_ptr<const DexFile>> OpenDexFilesFromOat(
-      const char* dex_location, const char* oat_location,
-      std::vector<std::string>* error_msgs)
+  std::vector<std::unique_ptr<const DexFile>> OpenDexFilesFromOat(const char* dex_location,
+                                                                  const char* oat_location,
+                                                                  out<std::vector<std::string>>
+                                                                      error_msgs)
       REQUIRES(!dex_lock_, !Locks::mutator_lock_);
 
   // Allocate an instance of a java.lang.Object.
-  mirror::Object* AllocObject(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_)
+  mirror::Object* AllocObject(Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
   // TODO: replace this with multiple methods that allocate the correct managed type.
   template <class T>
   mirror::ObjectArray<T>* AllocObjectArray(Thread* self, size_t length)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
   mirror::ObjectArray<mirror::Class>* AllocClassArray(Thread* self, size_t length)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
   mirror::ObjectArray<mirror::String>* AllocStringArray(Thread* self, size_t length)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
   ArtField* AllocArtFieldArray(Thread* self, size_t length);
 
   ArtMethod* AllocArtMethodArray(Thread* self, size_t length);
 
   mirror::PointerArray* AllocPointerArray(Thread* self, size_t length)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
   mirror::IfTable* AllocIfTable(Thread* self, size_t ifcount)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
-  mirror::ObjectArray<mirror::StackTraceElement>* AllocStackTraceElementArray(
-      Thread* self, size_t length) SHARED_REQUIRES(Locks::mutator_lock_)
-          REQUIRES(!Roles::uninterruptible_);
+  mirror::ObjectArray<mirror::StackTraceElement>* AllocStackTraceElementArray(Thread* self,
+                                                                              size_t length)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
   void VerifyClass(Thread* self, Handle<mirror::Class> klass)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
-  bool VerifyClassUsingOatFile(const DexFile& dex_file, mirror::Class* klass,
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
+  bool VerifyClassUsingOatFile(const DexFile& dex_file,
+                               mirror::Class* klass,
                                mirror::Class::Status& oat_file_class_status)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
   void ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
                                          Handle<mirror::Class> klass)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
   void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, ArtMethod* klass)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
-  mirror::Class* CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa, jstring name,
-                                  jobjectArray interfaces, jobject loader, jobjectArray methods,
+  mirror::Class* CreateProxyClass(ScopedObjectAccessAlreadyRunnable& soa,
+                                  jstring name,
+                                  jobjectArray interfaces,
+                                  jobject loader,
+                                  jobjectArray methods,
                                   jobjectArray throws)
       SHARED_REQUIRES(Locks::mutator_lock_);
   std::string GetDescriptorForProxy(mirror::Class* proxy_class)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  ArtMethod* FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method)
+  ArtMethod* FindMethodForProxy(mirror::Class* proxy_class,
+                                ArtMethod* proxy_method)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -385,7 +451,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get the oat code for a method from a method index.
-  const void* GetQuickOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx,
+  const void* GetQuickOatCodeFor(const DexFile& dex_file,
+                                 uint16_t class_def_idx,
                                  uint32_t method_idx)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -395,7 +462,7 @@
   const void* GetOatMethodQuickCodeFor(ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  const OatFile::OatMethod FindOatMethodFor(ArtMethod* method, bool* found)
+  const OatFile::OatMethod FindOatMethodFor(ArtMethod* method, out<bool> found)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   pid_t GetClassesLockOwner();  // For SignalCatcher.
@@ -452,12 +519,14 @@
 
   // Returns true if the method can be called with its direct code pointer, false otherwise.
   bool MayBeCalledWithDirectCodePointer(ArtMethod* m)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Creates a GlobalRef PathClassLoader that can be used to load classes from the given dex files.
   // Note: the objects are not completely set up. Do not use this outside of tests and the compiler.
-  jobject CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+  jobject CreatePathClassLoader(Thread* self, const std::vector<const DexFile*>& dex_files)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   size_t GetImagePointerSize() const {
     DCHECK(ValidPointerSize(image_pointer_size_)) << image_pointer_size_;
@@ -476,38 +545,67 @@
   void DropFindArrayClassCache() SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
+  class CompareClassLoaderGcRoot {
+   public:
+    bool operator()(const GcRoot<mirror::ClassLoader>& a, const GcRoot<mirror::ClassLoader>& b)
+        const SHARED_REQUIRES(Locks::mutator_lock_) {
+      return a.Read() < b.Read();
+    }
+  };
+
+  typedef SafeMap<GcRoot<mirror::ClassLoader>, ClassTable*, CompareClassLoaderGcRoot>
+      ClassLoaderClassTable;
+
+  void VisitClassesInternal(ClassVisitor* visitor)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of zygote and image classes.
+  size_t NumZygoteClasses() const REQUIRES(Locks::classlinker_classes_lock_);
+
+  // Returns the number of non zygote nor image classes.
+  size_t NumNonZygoteClasses() const REQUIRES(Locks::classlinker_classes_lock_);
+
   OatFile& GetImageOatFile(gc::space::ImageSpace* space)
-      REQUIRES(!dex_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(!dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FinishInit(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // For early bootstrapping by Init
   mirror::Class* AllocClass(Thread* self, mirror::Class* java_lang_Class, uint32_t class_size)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
   // Alloc* convenience functions to avoid needing to pass in mirror::Class*
   // values that are known to the ClassLinker such as
   // kObjectArrayClass and kJavaLangString etc.
   mirror::Class* AllocClass(Thread* self, uint32_t class_size)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
   mirror::DexCache* AllocDexCache(Thread* self, const DexFile& dex_file)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
   mirror::Class* CreatePrimitiveClass(Thread* self, Primitive::Type type)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
   mirror::Class* InitializePrimitiveClass(mirror::Class* primitive_class, Primitive::Type type)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
-  mirror::Class* CreateArrayClass(Thread* self, const char* descriptor, size_t hash,
+  mirror::Class* CreateArrayClass(Thread* self,
+                                  const char* descriptor,
+                                  size_t hash,
                                   Handle<mirror::ClassLoader> class_loader)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_, !Roles::uninterruptible_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   void AppendToBootClassPath(Thread* self, const DexFile& dex_file)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
   void AppendToBootClassPath(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Precomputes size needed for Class, in the case of a non-temporary class this size must be
   // sufficient to hold all static fields.
@@ -516,86 +614,112 @@
 
   // Setup the classloader, class def index, type idx so that we can insert this class in the class
   // table.
-  void SetupClass(const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
-                  Handle<mirror::Class> klass, mirror::ClassLoader* class_loader)
+  void SetupClass(const DexFile& dex_file,
+                  const DexFile::ClassDef& dex_class_def,
+                  Handle<mirror::Class> klass,
+                  mirror::ClassLoader* class_loader)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void LoadClass(Thread* self, const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
+  void LoadClass(Thread* self,
+                 const DexFile& dex_file,
+                 const DexFile::ClassDef& dex_class_def,
                  Handle<mirror::Class> klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void LoadClassMembers(Thread* self, const DexFile& dex_file, const uint8_t* class_data,
-                        Handle<mirror::Class> klass, const OatFile::OatClass* oat_class)
+  void LoadClassMembers(Thread* self,
+                        const DexFile& dex_file,
+                        const uint8_t* class_data,
+                        Handle<mirror::Class> klass,
+                        const OatFile::OatClass* oat_class)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void LoadField(const ClassDataItemIterator& it, Handle<mirror::Class> klass,
+  void LoadField(const ClassDataItemIterator& it,
+                 Handle<mirror::Class> klass,
                  ArtField* dst)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void LoadMethod(Thread* self, const DexFile& dex_file, const ClassDataItemIterator& it,
-                  Handle<mirror::Class> klass, ArtMethod* dst)
+  void LoadMethod(Thread* self,
+                  const DexFile& dex_file,
+                  const ClassDataItemIterator& it,
+                  Handle<mirror::Class> klass,
+                  ArtMethod* dst)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FixupStaticTrampolines(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Finds the associated oat class for a dex_file and descriptor. Returns an invalid OatClass on
   // error and sets found to false.
-  OatFile::OatClass FindOatClass(const DexFile& dex_file, uint16_t class_def_idx, bool* found)
+  OatFile::OatClass FindOatClass(const DexFile& dex_file, uint16_t class_def_idx, out<bool> found)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void RegisterDexFileLocked(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
-      REQUIRES(dex_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+      REQUIRES(dex_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
   bool IsDexFileRegisteredLocked(const DexFile& dex_file)
       SHARED_REQUIRES(dex_lock_, Locks::mutator_lock_);
 
-  bool InitializeClass(Thread* self, Handle<mirror::Class> klass, bool can_run_clinit,
+  bool InitializeClass(Thread* self,
+                       Handle<mirror::Class> klass,
+                       bool can_run_clinit,
                        bool can_init_parents)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
-  bool WaitForInitializeClass(Handle<mirror::Class> klass, Thread* self,
-                              ObjectLock<mirror::Class>& lock);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
+  bool WaitForInitializeClass(Handle<mirror::Class> klass,
+                              Thread* self,
+                              ObjectLock<mirror::Class>* lock);
   bool ValidateSuperClassDescriptors(Handle<mirror::Class> klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsSameDescriptorInDifferentClassContexts(Thread* self, const char* descriptor,
+  bool IsSameDescriptorInDifferentClassContexts(Thread* self,
+                                                const char* descriptor,
                                                 Handle<mirror::ClassLoader> class_loader1,
                                                 Handle<mirror::ClassLoader> class_loader2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool IsSameMethodSignatureInDifferentClassContexts(Thread* self, ArtMethod* method,
-                                                     mirror::Class* klass1, mirror::Class* klass2)
+  bool IsSameMethodSignatureInDifferentClassContexts(Thread* self,
+                                                     ArtMethod* method,
+                                                     mirror::Class* klass1,
+                                                     mirror::Class* klass2)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool LinkClass(Thread* self, const char* descriptor, Handle<mirror::Class> klass,
+  bool LinkClass(Thread* self,
+                 const char* descriptor,
+                 Handle<mirror::Class> klass,
                  Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                 MutableHandle<mirror::Class>* h_new_class_out)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+                 out<MutableHandle<mirror::Class>> h_new_class_out)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Locks::classlinker_classes_lock_);
 
   bool LinkSuperClass(Handle<mirror::Class> klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
-  bool LinkMethods(Thread* self, Handle<mirror::Class> klass,
+  bool LinkMethods(Thread* self,
+                   Handle<mirror::Class> klass,
                    Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                   ArtMethod** out_imt)
+                   out<ArtMethod* [mirror::Class::kImtSize]> out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool LinkVirtualMethods(Thread* self, Handle<mirror::Class> klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool LinkInterfaceMethods(Thread* self, Handle<mirror::Class> klass,
+  bool LinkInterfaceMethods(Thread* self,
+                            Handle<mirror::Class> klass,
                             Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                            ArtMethod** out_imt)
+                            out<ArtMethod* [mirror::Class::kImtSize]> out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool LinkStaticFields(Thread* self, Handle<mirror::Class> klass, size_t* class_size)
+  bool LinkStaticFields(Thread* self,
+                        Handle<mirror::Class> klass,
+                        out<size_t> class_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
   bool LinkInstanceFields(Thread* self, Handle<mirror::Class> klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  bool LinkFields(Thread* self, Handle<mirror::Class> klass, bool is_static, size_t* class_size)
+  bool LinkFields(Thread* self, Handle<mirror::Class> klass, bool is_static, out<size_t> class_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void LinkCode(ArtMethod* method, const OatFile::OatClass* oat_class,
+  void LinkCode(ArtMethod* method,
+                const OatFile::OatClass* oat_class,
                 uint32_t class_def_method_index)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateReferenceInstanceOffsets(Handle<mirror::Class> klass)
@@ -632,18 +756,16 @@
   void EnsurePreverifiedMethods(Handle<mirror::Class> c)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  mirror::Class* LookupClassFromTableLocked(const char* descriptor,
-                                            mirror::ClassLoader* class_loader,
-                                            size_t hash)
-      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
-
-  mirror::Class* UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash)
-      REQUIRES(!Locks::classlinker_classes_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   mirror::Class* LookupClassFromImage(const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns null if not found.
+  ClassTable* ClassTableForClassLoader(mirror::ClassLoader* class_loader)
+      SHARED_REQUIRES(Locks::mutator_lock_, Locks::classlinker_classes_lock_);
+  // Insert a new class table if not found.
+  ClassTable* InsertClassTableForClassLoader(mirror::ClassLoader* class_loader)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::classlinker_classes_lock_);
+
   // EnsureResolved is called to make sure that a class in the class_table_ has been resolved
   // before returning it to the caller. Its the responsibility of the thread that placed the class
   // in the table to make it resolved. The thread doing resolution must notify on the class' lock
@@ -670,7 +792,7 @@
       REQUIRES(!dex_lock_);
 
   // Check for duplicate class definitions of the given oat file against all open oat files.
-  bool HasCollisions(const OatFile* oat_file, std::string* error_msg) REQUIRES(!dex_lock_);
+  bool HasCollisions(const OatFile* oat_file, out<std::string> error_msg) REQUIRES(!dex_lock_);
 
   bool HasInitWithString(Thread* self, const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
@@ -678,9 +800,11 @@
   bool CanWeInitializeClass(mirror::Class* klass, bool can_init_statics, bool can_init_parents)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void UpdateClassVirtualMethods(mirror::Class* klass, ArtMethod* new_methods,
+  void UpdateClassVirtualMethods(mirror::Class* klass,
+                                 ArtMethod* new_methods,
                                  size_t new_num_methods)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::classlinker_classes_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!Locks::classlinker_classes_lock_);
 
   std::vector<const DexFile*> boot_class_path_;
   std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
@@ -690,43 +814,11 @@
   std::vector<GcRoot<mirror::DexCache>> dex_caches_ GUARDED_BY(dex_lock_);
   std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_);
 
-  class ClassDescriptorHashEquals {
-   public:
-    // Same class loader and descriptor.
-    std::size_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
-    bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
-        NO_THREAD_SAFETY_ANALYSIS;
-    // Same class loader and descriptor.
-    std::size_t operator()(const std::pair<const char*, mirror::ClassLoader*>& element) const
-        NO_THREAD_SAFETY_ANALYSIS;
-    bool operator()(const GcRoot<mirror::Class>& a,
-                    const std::pair<const char*, mirror::ClassLoader*>& b) const
-        NO_THREAD_SAFETY_ANALYSIS;
-    // Same descriptor.
-    bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
-        NO_THREAD_SAFETY_ANALYSIS;
-    std::size_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
-  };
-  class GcRootEmptyFn {
-   public:
-    void MakeEmpty(GcRoot<mirror::Class>& item) const {
-      item = GcRoot<mirror::Class>();
-    }
-    bool IsEmpty(const GcRoot<mirror::Class>& item) const {
-      return item.IsNull();
-    }
-  };
+  // This contains strong roots. To enable concurrent root scanning of the class table.
+  ClassLoaderClassTable classes_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
-  // hash set which hashes class descriptor, and compares descriptors nad class loaders. Results
-  // should be compared for a matching Class descriptor and class loader.
-  typedef HashSet<GcRoot<mirror::Class>, GcRootEmptyFn, ClassDescriptorHashEquals,
-      ClassDescriptorHashEquals, TrackingAllocator<GcRoot<mirror::Class>, kAllocatorTagClassTable>>
-      Table;
-  // This contains strong roots. To enable concurrent root scanning of
-  // the class table, be careful to use a read barrier when accessing this.
-  Table class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
-  Table pre_zygote_class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
-  std::vector<GcRoot<mirror::Class>> new_class_roots_;
+  // New class roots, only used by CMS since the GC needs to mark these in the pause.
+  std::vector<GcRoot<mirror::Class>> new_class_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
   // Do we need to search dex caches to find image classes?
   bool dex_cache_image_class_lookup_required_;
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
new file mode 100644
index 0000000..c245d4e
--- /dev/null
+++ b/runtime/class_table.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_table.h"
+
+#include "mirror/class-inl.h"
+
+namespace art {
+
+ClassTable::ClassTable() {
+  classes_.push_back(ClassSet());
+}
+
+void ClassTable::FreezeSnapshot() {
+  classes_.push_back(ClassSet());
+}
+
+bool ClassTable::Contains(mirror::Class* klass) {
+  for (ClassSet& class_set : classes_) {
+    auto it = class_set.Find(GcRoot<mirror::Class>(klass));
+    if (it != class_set.end()) {
+      return it->Read() == klass;
+    }
+  }
+  return false;
+}
+
+mirror::Class* ClassTable::UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash) {
+  // Should only be updating latest table.
+  auto existing_it = classes_.back().FindWithHash(descriptor, hash);
+  if (kIsDebugBuild && existing_it == classes_.back().end()) {
+    for (const ClassSet& class_set : classes_) {
+      if (class_set.FindWithHash(descriptor, hash) != class_set.end()) {
+        LOG(FATAL) << "Updating class found in frozen table " << descriptor;
+      }
+    }
+    LOG(FATAL) << "Updating class not found " << descriptor;
+  }
+  mirror::Class* const existing = existing_it->Read();
+  CHECK_NE(existing, klass) << descriptor;
+  CHECK(!existing->IsResolved()) << descriptor;
+  CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusResolving) << descriptor;
+  CHECK(!klass->IsTemp()) << descriptor;
+  VerifyObject(klass);
+  // Update the element in the hash set with the new class. This is safe to do since the descriptor
+  // doesn't change.
+  *existing_it = GcRoot<mirror::Class>(klass);
+  return existing;
+}
+
+void ClassTable::VisitRoots(RootVisitor* visitor, VisitRootFlags flags ATTRIBUTE_UNUSED) {
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
+      visitor, RootInfo(kRootStickyClass));
+  for (ClassSet& class_set : classes_) {
+    for (GcRoot<mirror::Class>& root : class_set) {
+      buffered_visitor.VisitRoot(root);
+    }
+  }
+}
+
+bool ClassTable::Visit(ClassVisitor* visitor) {
+  for (ClassSet& class_set : classes_) {
+    for (GcRoot<mirror::Class>& root : class_set) {
+      if (!visitor->Visit(root.Read())) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+size_t ClassTable::NumZygoteClasses() const {
+  size_t sum = 0;
+  for (size_t i = 0; i < classes_.size() - 1; ++i) {
+    sum += classes_[i].Size();
+  }
+  return sum;
+}
+
+size_t ClassTable::NumNonZygoteClasses() const {
+  return classes_.back().Size();
+}
+
+mirror::Class* ClassTable::Lookup(const char* descriptor, size_t hash) {
+  for (ClassSet& class_set : classes_) {
+    auto it = class_set.FindWithHash(descriptor, hash);
+    if (it != class_set.end()) {
+     return it->Read();
+    }
+  }
+  return nullptr;
+}
+
+void ClassTable::Insert(mirror::Class* klass) {
+  classes_.back().Insert(GcRoot<mirror::Class>(klass));
+}
+
+void ClassTable::InsertWithHash(mirror::Class* klass, size_t hash) {
+  classes_.back().InsertWithHash(GcRoot<mirror::Class>(klass), hash);
+}
+
+bool ClassTable::Remove(const char* descriptor) {
+  for (ClassSet& class_set : classes_) {
+    auto it = class_set.Find(descriptor);
+    if (it != class_set.end()) {
+      class_set.Erase(it);
+      return true;
+    }
+  }
+  return false;
+}
+
+std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
+    const {
+  std::string temp;
+  return ComputeModifiedUtf8Hash(root.Read()->GetDescriptor(&temp));
+}
+
+bool ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
+                                                       const GcRoot<mirror::Class>& b) const {
+  DCHECK_EQ(a.Read()->GetClassLoader(), b.Read()->GetClassLoader());
+  std::string temp;
+  return a.Read()->DescriptorEquals(b.Read()->GetDescriptor(&temp));
+}
+
+bool ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& a,
+                                                       const char* descriptor) const {
+  return a.Read()->DescriptorEquals(descriptor);
+}
+
+std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
+  return ComputeModifiedUtf8Hash(descriptor);
+}
+
+}  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
new file mode 100644
index 0000000..252a47d
--- /dev/null
+++ b/runtime/class_table.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CLASS_TABLE_H_
+#define ART_RUNTIME_CLASS_TABLE_H_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/allocator.h"
+#include "base/hash_set.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+#include "gc_root.h"
+#include "object_callbacks.h"
+#include "runtime.h"
+
+namespace art {
+
+namespace mirror {
+  class ClassLoader;
+}  // namespace mirror
+
+class ClassVisitor {
+ public:
+  virtual ~ClassVisitor() {}
+  // Return true to continue visiting.
+  virtual bool Visit(mirror::Class* klass) = 0;
+};
+
+// Each loader has a ClassTable
+class ClassTable {
+ public:
+  ClassTable();
+
+  // Used by image writer for checking.
+  bool Contains(mirror::Class* klass)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Freeze the current class tables by allocating a new table and never updating or modifying the
+  // existing table. This helps prevents dirty pages after caused by inserting after zygote fork.
+  void FreezeSnapshot()
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of classes in previous snapshots.
+  size_t NumZygoteClasses() const REQUIRES(Locks::classlinker_classes_lock_);
+
+  // Returns all off the classes in the lastest snapshot.
+  size_t NumNonZygoteClasses() const REQUIRES(Locks::classlinker_classes_lock_);
+
+  // Update a class in the table with the new class. Returns the existing class which was replaced.
+  mirror::Class* UpdateClass(const char* descriptor, mirror::Class* new_klass, size_t hash)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return false if the callback told us to exit.
+  bool Visit(ClassVisitor* visitor)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  mirror::Class* Lookup(const char* descriptor, size_t hash)
+      SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_);
+
+  void Insert(mirror::Class* klass)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+  void InsertWithHash(mirror::Class* klass, size_t hash)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns true if the class was found and removed, false otherwise.
+  bool Remove(const char* descriptor)
+      REQUIRES(Locks::classlinker_classes_lock_) SHARED_REQUIRES(Locks::mutator_lock_);
+
+ private:
+  class ClassDescriptorHashEquals {
+   public:
+    // Same class loader and descriptor.
+    std::size_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
+    bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
+        NO_THREAD_SAFETY_ANALYSIS;;
+    // Same descriptor.
+    bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
+        NO_THREAD_SAFETY_ANALYSIS;
+    std::size_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
+  };
+  class GcRootEmptyFn {
+   public:
+    void MakeEmpty(GcRoot<mirror::Class>& item) const {
+      item = GcRoot<mirror::Class>();
+    }
+    bool IsEmpty(const GcRoot<mirror::Class>& item) const {
+      return item.IsNull();
+    }
+  };
+  // hash set which hashes class descriptor, and compares descriptors nad class loaders. Results
+  // should be compared for a matching Class descriptor and class loader.
+  typedef HashSet<GcRoot<mirror::Class>, GcRootEmptyFn, ClassDescriptorHashEquals,
+      ClassDescriptorHashEquals, TrackingAllocator<GcRoot<mirror::Class>, kAllocatorTagClassTable>>
+      ClassSet;
+
+  // TODO: shard lock to have one per class loader.
+  std::vector<ClassSet> classes_ GUARDED_BY(Locks::classlinker_classes_lock_);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CLASS_TABLE_H_
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 287a50b..1865516 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -24,6 +24,7 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/time_utils.h"
+#include "base/out.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
@@ -948,33 +949,27 @@
   return JDWP::ERR_NONE;
 }
 
+// Get the complete list of reference classes (i.e. all classes except
+// the primitive types).
+// Returns a newly-allocated buffer full of RefTypeId values.
+class ClassListCreator : public ClassVisitor {
+ public:
+  explicit ClassListCreator(std::vector<JDWP::RefTypeId>* classes) : classes_(classes) {}
+
+  bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (!c->IsPrimitive()) {
+      classes_->push_back(Dbg::GetObjectRegistry()->AddRefType(c));
+    }
+    return true;
+  }
+
+ private:
+  std::vector<JDWP::RefTypeId>* const classes_;
+};
+
 void Dbg::GetClassList(std::vector<JDWP::RefTypeId>* classes) {
-  // Get the complete list of reference classes (i.e. all classes except
-  // the primitive types).
-  // Returns a newly-allocated buffer full of RefTypeId values.
-  struct ClassListCreator {
-    explicit ClassListCreator(std::vector<JDWP::RefTypeId>* classes_in) : classes(classes_in) {
-    }
-
-    static bool Visit(mirror::Class* c, void* arg) {
-      return reinterpret_cast<ClassListCreator*>(arg)->Visit(c);
-    }
-
-    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
-    // annotalysis.
-    bool Visit(mirror::Class* c) NO_THREAD_SAFETY_ANALYSIS {
-      if (!c->IsPrimitive()) {
-        classes->push_back(gRegistry->AddRefType(c));
-      }
-      return true;
-    }
-
-    std::vector<JDWP::RefTypeId>* const classes;
-  };
-
   ClassListCreator clc(classes);
-  Runtime::Current()->GetClassLinker()->VisitClassesWithoutClassesLock(ClassListCreator::Visit,
-                                                                       &clc);
+  Runtime::Current()->GetClassLinker()->VisitClassesWithoutClassesLock(&clc);
 }
 
 JDWP::JdwpError Dbg::GetClassInfo(JDWP::RefTypeId class_id, JDWP::JdwpTypeTag* pTypeTag,
@@ -1006,7 +1001,7 @@
 
 void Dbg::FindLoadedClassBySignature(const char* descriptor, std::vector<JDWP::RefTypeId>* ids) {
   std::vector<mirror::Class*> classes;
-  Runtime::Current()->GetClassLinker()->LookupClasses(descriptor, classes);
+  Runtime::Current()->GetClassLinker()->LookupClasses(descriptor, outof(classes));
   ids->clear();
   for (size_t i = 0; i < classes.size(); ++i) {
     ids->push_back(gRegistry->Add(classes[i]));
@@ -4632,7 +4627,7 @@
   // Send a series of heap segment chunks.
   HeapChunkContext context(what == HPSG_WHAT_MERGED_OBJECTS, native);
   if (native) {
-#if defined(HAVE_ANDROID_OS) && defined(USE_DLMALLOC)
+#if defined(__ANDROID__) && defined(USE_DLMALLOC)
     dlmalloc_inspect_all(HeapChunkContext::HeapChunkNativeCallback, &context);
     HeapChunkContext::HeapChunkNativeCallback(nullptr, nullptr, 0, &context);  // Indicate end of a space.
 #else
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index cef2510..3d3f7a1 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -20,6 +20,7 @@
 #include <jni.h>
 
 #include "base/macros.h"
+#include "base/mutex.h"
 #include "offsets.h"
 
 #define QUICK_ENTRYPOINT_OFFSET(ptr_size, x) \
@@ -71,6 +72,16 @@
                            Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 
+// Read barrier entrypoints.
+// Compilers for ARM, ARM64, MIPS, MIPS64 can insert a call to this function directly.
+// For x86 and x86_64, compilers need a wrapper assembly function, to handle mismatch in ABI.
+// This is the read barrier slow path for instance and static fields and reference-type arrays.
+// TODO: Currently the read barrier does not have a fast path for compilers to directly generate.
+// Ideally the slow path should only take one parameter "ref".
+extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref, mirror::Object* obj,
+                                              uint32_t offset)
+    SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 60bbf4a..73d8ae7 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -145,7 +145,8 @@
   V(NewStringFromStringBuffer, void) \
   V(NewStringFromStringBuilder, void) \
 \
-  V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*)
+  V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
+  V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t)
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
 #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_   // #define is only for lint.
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 25a943a..0a1d806 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -557,4 +557,16 @@
   return -1;  // failure
 }
 
+// TODO: Currently the read barrier does not have a fast path. Ideally the slow path should only
+// take one parameter "ref", which is generated by the fast path.
+extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
+                                              mirror::Object* obj, uint32_t offset) {
+  DCHECK(kUseReadBarrier);
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(obj) + offset;
+  mirror::HeapReference<mirror::Object>* ref_addr =
+      reinterpret_cast<mirror::HeapReference<mirror::Object>*>(raw_addr);
+  return ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, true>(obj, MemberOffset(offset),
+                                                                      ref_addr);
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c05c935..f7a3cd5 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -311,8 +311,9 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
                          sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierSlow, sizeof(void*));
 
-    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierJni)
+    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierSlow)
             + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all);
   }
 };
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index 70704c1..b9f24f3 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -88,7 +88,7 @@
 
   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (kIsDebugBuild && !root->IsNull()) {
+    if (!root->IsNull()) {
       VisitRoot(root);
     }
   }
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index ec4d626..16c9354 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -20,7 +20,7 @@
 #include "base/stl_util.h"
 #include "stack.h"
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 #include "cutils/properties.h"
 #endif
 
@@ -42,7 +42,7 @@
 }
 
 void AllocRecordObjectMap::SetProperties() {
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   // Check whether there's a system property overriding the max number of records.
   const char* propertyName = "dalvik.vm.allocTrackerMax";
   char allocMaxString[PROPERTY_VALUE_MAX];
diff --git a/runtime/gc/allocator/dlmalloc.h b/runtime/gc/allocator/dlmalloc.h
index 0e91a43..0558921 100644
--- a/runtime/gc/allocator/dlmalloc.h
+++ b/runtime/gc/allocator/dlmalloc.h
@@ -35,7 +35,7 @@
 #include "../../bionic/libc/upstream-dlmalloc/malloc.h"
 #pragma GCC diagnostic pop
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 // Define dlmalloc routines from bionic that cannot be included directly because of redefining
 // symbols from the include above.
 extern "C" void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), void* arg);
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index abaa97f..470bc1c 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1170,7 +1170,7 @@
 
   // First mark slots to free in the bulk free bit map without locking the
   // size bracket locks. On host, unordered_set is faster than vector + flag.
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   std::vector<Run*> runs;
 #else
   std::unordered_set<Run*, hash_run, eq_run> runs;
@@ -1237,7 +1237,7 @@
     DCHECK_EQ(run->magic_num_, kMagicNum);
     // Set the bit in the bulk free bit map.
     freed_bytes += run->MarkBulkFreeBitMap(ptr);
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
     if (!run->to_be_bulk_freed_) {
       run->to_be_bulk_freed_ = true;
       runs.push_back(run);
@@ -1252,7 +1252,7 @@
   // union the bulk free bit map into the thread-local free bit map
   // (for thread-local runs.)
   for (Run* run : runs) {
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
     DCHECK(run->to_be_bulk_freed_);
     run->to_be_bulk_freed_ = false;
 #endif
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 07309d8..5f617bd 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1277,7 +1277,7 @@
   FinishGC(self, collector::kGcTypeNone);
   size_t native_reclaimed = 0;
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   // Only trim the native heap if we don't care about pauses.
   if (!CareAboutPauseTimes()) {
 #if defined(USE_DLMALLOC)
@@ -1290,7 +1290,7 @@
     UNIMPLEMENTED(WARNING) << "Add trimming support";
 #endif
   }
-#endif  // HAVE_ANDROID_OS
+#endif  // __ANDROID__
   uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
       << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 1923d24..aba32a0 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -25,6 +25,7 @@
 
 #include "art_method.h"
 #include "base/macros.h"
+#include "base/out.h"
 #include "base/stl_util.h"
 #include "base/scoped_flock.h"
 #include "base/time_utils.h"
@@ -207,7 +208,7 @@
   // Note: we do not generate a fully debuggable boot image so we do not pass the
   // compiler flag --debuggable here.
 
-  Runtime::Current()->AddCurrentRuntimeFeaturesAsDex2OatArguments(&arg_vector);
+  Runtime::Current()->AddCurrentRuntimeFeaturesAsDex2OatArguments(outof(arg_vector));
   CHECK_EQ(image_isa, kRuntimeISA)
       << "We should always be generating an image for the current isa.";
 
@@ -789,10 +790,13 @@
 
   CHECK(image_header.GetOatDataBegin() != nullptr);
 
-  OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, image_header.GetOatDataBegin(),
+  OatFile* oat_file = OatFile::Open(oat_filename,
+                                    oat_filename,
+                                    image_header.GetOatDataBegin(),
                                     image_header.GetOatFileBegin(),
                                     !Runtime::Current()->IsAotCompiler(),
-                                    nullptr, error_msg);
+                                    nullptr /* no abs dex location */,
+                                    outof_ptr(error_msg));
   if (oat_file == nullptr) {
     *error_msg = StringPrintf("Failed to open oat file '%s' referenced from image %s: %s",
                               oat_filename.c_str(), GetName(), error_msg->c_str());
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 75fc84b..c9ba6cf 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -28,6 +28,8 @@
 
 namespace art {
 
+static constexpr bool kDumpStackOnNonLocalReference = false;
+
 template<typename T>
 class MutatorLockedDumpable {
  public:
@@ -183,7 +185,9 @@
       if (env->check_jni) {
         ScopedObjectAccess soa(self);
         LOG(WARNING) << "Attempt to remove non-JNI local reference, dumping thread";
-        self->Dump(LOG(WARNING));
+        if (kDumpStackOnNonLocalReference) {
+          self->Dump(LOG(WARNING));
+        }
       }
       return true;
     }
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 9711cf2..e28d578 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -49,12 +49,20 @@
 static constexpr StackVisitor::StackWalkKind kInstrumentationStackWalk =
     StackVisitor::StackWalkKind::kSkipInlinedFrames;
 
-static bool InstallStubsClassVisitor(mirror::Class* klass, void* arg)
-    REQUIRES(Locks::mutator_lock_) {
-  Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg);
-  instrumentation->InstallStubsForClass(klass);
-  return true;  // we visit all classes.
-}
+class InstallStubsClassVisitor : public ClassVisitor {
+ public:
+  explicit InstallStubsClassVisitor(Instrumentation* instrumentation)
+      : instrumentation_(instrumentation) {}
+
+  bool Visit(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
+    instrumentation_->InstallStubsForClass(klass);
+    return true;  // we visit all classes.
+  }
+
+ private:
+  Instrumentation* const instrumentation_;
+};
+
 
 Instrumentation::Instrumentation()
     : instrumentation_stubs_installed_(false), entry_exit_stubs_installed_(false),
@@ -563,14 +571,16 @@
       entry_exit_stubs_installed_ = true;
       interpreter_stubs_installed_ = false;
     }
-    runtime->GetClassLinker()->VisitClasses(InstallStubsClassVisitor, this);
+    InstallStubsClassVisitor visitor(this);
+    runtime->GetClassLinker()->VisitClasses(&visitor);
     instrumentation_stubs_installed_ = true;
     MutexLock mu(self, *Locks::thread_list_lock_);
     runtime->GetThreadList()->ForEach(InstrumentationInstallStack, this);
   } else {
     interpreter_stubs_installed_ = false;
     entry_exit_stubs_installed_ = false;
-    runtime->GetClassLinker()->VisitClasses(InstallStubsClassVisitor, this);
+    InstallStubsClassVisitor visitor(this);
+    runtime->GetClassLinker()->VisitClasses(&visitor);
     // Restore stack only if there is no method currently deoptimized.
     bool empty;
     {
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 2486a98..a6cccef 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -553,7 +553,7 @@
   ArtMethod* unboxed_closure = nullptr;
   // Raise an exception if unboxing fails.
   if (!Runtime::Current()->GetLambdaBoxTable()->UnboxLambda(boxed_closure_object,
-                                                            &unboxed_closure)) {
+                                                            outof(unboxed_closure))) {
     CHECK(self->IsExceptionPending());
     return false;
   }
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index adc2912..51952c4 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -24,7 +24,7 @@
 #include "base/stringprintf.h"
 #include "jdwp/jdwp_priv.h"
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 #include "cutils/sockets.h"
 #endif
 
@@ -224,7 +224,7 @@
        */
       int  ret = connect(control_sock_, &control_addr_.controlAddrPlain, control_addr_len_);
       if (!ret) {
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
         if (!socket_peer_is_trusted(control_sock_)) {
           if (shutdown(control_sock_, SHUT_RDWR)) {
             PLOG(ERROR) << "trouble shutting down socket";
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 260abe7..5a9a0f5 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -248,7 +248,7 @@
     case kJdwpTransportSocket:
       InitSocketTransport(state.get(), options);
       break;
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
     case kJdwpTransportAndroidAdb:
       InitAdbTransport(state.get(), options);
       break;
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
index 64a6076..22cc820 100644
--- a/runtime/lambda/box_table.cc
+++ b/runtime/lambda/box_table.cc
@@ -94,8 +94,7 @@
   return method_as_object;
 }
 
-bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) {
-  DCHECK(object != nullptr);
+bool BoxTable::UnboxLambda(mirror::Object* object, out<ClosureType> out_closure) {
   *out_closure = nullptr;
 
   // Note that we do not need to access lambda_table_lock_ here
diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h
index 312d811..c6d3d0c 100644
--- a/runtime/lambda/box_table.h
+++ b/runtime/lambda/box_table.h
@@ -18,6 +18,7 @@
 
 #include "base/allocator.h"
 #include "base/hash_map.h"
+#include "base/out.h"
 #include "gc_root.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -51,7 +52,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::lambda_table_lock_);
 
   // Unboxes an object back into the lambda. Returns false and throws an exception on failure.
-  bool UnboxLambda(mirror::Object* object, ClosureType* out_closure)
+  bool UnboxLambda(mirror::Object* object, out<ClosureType> out_closure)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sweep weak references to lambda boxes. Update the addresses if the objects have been
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 8df8f96..d9ad7dc 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -280,7 +280,7 @@
   ScopedFd fd(-1);
 
 #ifdef USE_ASHMEM
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   const bool use_ashmem = true;
 #else
   // When not on Android ashmem is faked using files in /tmp. Ensure that such files won't
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 069e346..6568487 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -913,6 +913,33 @@
   DCHECK_EQ(pointer_size, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
 }
 
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline Class* Class::GetComponentType() {
+  return GetFieldObject<Class, kVerifyFlags, kReadBarrierOption>(ComponentTypeOffset());
+}
+
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline bool Class::IsArrayClass() {
+  return GetComponentType<kVerifyFlags, kReadBarrierOption>() != nullptr;
+}
+
+inline bool Class::IsAssignableFrom(Class* src) {
+  DCHECK(src != nullptr);
+  if (this == src) {
+    // Can always assign to things of the same type.
+    return true;
+  } else if (IsObjectClass()) {
+    // Can assign any reference to java.lang.Object.
+    return !src->IsPrimitive();
+  } else if (IsInterface()) {
+    return src->Implements(this);
+  } else if (src->IsArrayClass()) {
+    return IsAssignableFromArray(src);
+  } else {
+    return !src->IsInterface() && src->IsSubClass(this);
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index c01a5e8..d95bcd8 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -404,9 +404,8 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool IsArrayClass() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetComponentType<kVerifyFlags, kReadBarrierOption>() != nullptr;
-  }
+
+  bool IsArrayClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
@@ -423,9 +422,7 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  Class* GetComponentType() SHARED_REQUIRES(Locks::mutator_lock_) {
-    return GetFieldObject<Class, kVerifyFlags, kReadBarrierOption>(ComponentTypeOffset());
-  }
+  Class* GetComponentType() SHARED_REQUIRES(Locks::mutator_lock_);
 
   void SetComponentType(Class* new_component_type) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(GetComponentType() == nullptr);
@@ -617,22 +614,7 @@
   // downcast would be necessary. Similarly for interfaces, a class that implements (or an interface
   // that extends) another can be assigned to its parent, but not vice-versa. All Classes may assign
   // to themselves. Classes for primitive types may not assign to each other.
-  ALWAYS_INLINE bool IsAssignableFrom(Class* src) SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(src != nullptr);
-    if (this == src) {
-      // Can always assign to things of the same type.
-      return true;
-    } else if (IsObjectClass()) {
-      // Can assign any reference to java.lang.Object.
-      return !src->IsPrimitive();
-    } else if (IsInterface()) {
-      return src->Implements(this);
-    } else if (src->IsArrayClass()) {
-      return IsAssignableFromArray(src);
-    } else {
-      return !src->IsInterface() && src->IsSubClass(this);
-    }
-  }
+  ALWAYS_INLINE bool IsAssignableFrom(Class* src) SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE Class* GetSuperClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 4f97d20..1b210bb 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -17,6 +17,7 @@
 #include "dalvik_system_DexFile.h"
 
 #include "base/logging.h"
+#include "base/out.h"
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "class_linker.h"
@@ -164,7 +165,8 @@
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   std::vector<std::string> error_msgs;
 
-  dex_files = linker->OpenDexFilesFromOat(sourceName.c_str(), outputName.c_str(), &error_msgs);
+  dex_files =
+      linker->OpenDexFilesFromOat(sourceName.c_str(), outputName.c_str(), outof(error_msgs));
 
   if (!dex_files.empty()) {
     jlongArray array = ConvertNativeToJavaArray(env, dex_files);
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 7abc546..9ea339a 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -16,7 +16,7 @@
 
 #include "dalvik_system_VMRuntime.h"
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 extern "C" void android_set_application_target_sdk_version(uint32_t version);
 #endif
 #include <limits.h>
@@ -196,7 +196,7 @@
   // Note that targetSdkVersion may be 0, meaning "current".
   Runtime::Current()->SetTargetSdkVersion(target_sdk_version);
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   // This part is letting libc/dynamic linker know about current app's
   // target sdk version to enable compatibility workarounds.
   android_set_application_target_sdk_version(static_cast<uint32_t>(target_sdk_version));
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index abac815..856a3e7 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -31,10 +31,10 @@
 #include "verify_object-inl.h"
 
 #include <sstream>
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 // This function is provided by android linker.
 extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
-#endif  // HAVE_ANDROID_OS
+#endif  // __ANDROID__
 
 namespace art {
 
@@ -53,7 +53,7 @@
 }
 
 static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr) {
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   if (javaLdLibraryPathJstr != nullptr) {
     ScopedUtfChars ldLibraryPath(env, javaLdLibraryPathJstr);
     if (ldLibraryPath.c_str() != nullptr) {
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index 1515630..62a0b76 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -16,6 +16,7 @@
 
 #include "java_lang_VMClassLoader.h"
 
+#include "base/out.h"
 #include "class_linker.h"
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
@@ -45,7 +46,7 @@
     // Try the common case.
     StackHandleScope<1> hs(soa.Self());
     cl->FindClassInPathClassLoader(soa, soa.Self(), descriptor.c_str(), descriptor_hash,
-                                   hs.NewHandle(loader), &c);
+                                   hs.NewHandle(loader), outof(c));
     if (c != nullptr) {
       return soa.AddLocalReference<jclass>(c);
     }
diff --git a/runtime/oat.h b/runtime/oat.h
index ee2f3f6..29dd76c 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '7', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '8', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 098fe61..80fc7fa 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -27,11 +27,12 @@
 #include <sstream>
 
 // dlopen_ext support from bionic.
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 #include "android/dlext.h"
 #endif
 
 #include "art_method-inl.h"
+#include "base/out.h"
 #include "base/bit_vector.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
@@ -88,16 +89,16 @@
 OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
                                   const std::string& location,
                                   const char* abs_dex_location,
-                                  std::string* error_msg) {
+                                  out<std::string> error_msg) {
   std::unique_ptr<OatFile> oat_file(new OatFile(location, false));
   oat_file->elf_file_.reset(elf_file);
   uint64_t offset, size;
-  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
+  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", outof(offset), outof(size));
   CHECK(has_section);
   oat_file->begin_ = elf_file->Begin() + offset;
   oat_file->end_ = elf_file->Begin() + size + offset;
   // Ignore the optional .bss section when opening non-executable.
-  return oat_file->Setup(abs_dex_location, error_msg) ? oat_file.release() : nullptr;
+  return oat_file->Setup(abs_dex_location, outof_forward(error_msg)) ? oat_file.release() : nullptr;
 }
 
 OatFile* OatFile::Open(const std::string& filename,
@@ -106,7 +107,7 @@
                        uint8_t* oat_file_begin,
                        bool executable,
                        const char* abs_dex_location,
-                       std::string* error_msg) {
+                       out<std::string> error_msg) {
   CHECK(!filename.empty()) << location;
   CheckLocation(location);
   std::unique_ptr<OatFile> ret;
@@ -154,27 +155,34 @@
   return ret.release();
 }
 
-OatFile* OatFile::OpenWritable(File* file, const std::string& location,
+OatFile* OatFile::OpenWritable(File* file,
+                               const std::string& location,
                                const char* abs_dex_location,
-                               std::string* error_msg) {
+                               out<std::string> error_msg) {
   CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, true, false, abs_dex_location, error_msg);
+  return OpenElfFile(file, location, nullptr, nullptr, true, false, abs_dex_location,
+                     outof_forward(error_msg));
 }
 
-OatFile* OatFile::OpenReadable(File* file, const std::string& location,
+OatFile* OatFile::OpenReadable(File* file,
+                               const std::string& location,
                                const char* abs_dex_location,
-                               std::string* error_msg) {
+                               out<std::string> error_msg) {
   CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, false, false, abs_dex_location, error_msg);
+  return OpenElfFile(file, location, nullptr, nullptr, false, false, abs_dex_location,
+                     outof_forward(error_msg));
 }
 
 OatFile* OatFile::OpenDlopen(const std::string& elf_filename,
                              const std::string& location,
                              uint8_t* requested_base,
                              const char* abs_dex_location,
-                             std::string* error_msg) {
+                             out<std::string> error_msg) {
   std::unique_ptr<OatFile> oat_file(new OatFile(location, true));
-  bool success = oat_file->Dlopen(elf_filename, requested_base, abs_dex_location, error_msg);
+  bool success = oat_file->Dlopen(elf_filename,
+                                  requested_base,
+                                  abs_dex_location,
+                                  outof_forward(error_msg));
   if (!success) {
     return nullptr;
   }
@@ -188,10 +196,10 @@
                               bool writable,
                               bool executable,
                               const char* abs_dex_location,
-                              std::string* error_msg) {
+                              out<std::string> error_msg) {
   std::unique_ptr<OatFile> oat_file(new OatFile(location, executable));
   bool success = oat_file->ElfFileOpen(file, requested_base, oat_file_begin, writable, executable,
-                                       abs_dex_location, error_msg);
+                                       abs_dex_location, outof_forward(error_msg));
   if (!success) {
     CHECK(!error_msg->empty());
     return nullptr;
@@ -200,8 +208,13 @@
 }
 
 OatFile::OatFile(const std::string& location, bool is_executable)
-    : location_(location), begin_(nullptr), end_(nullptr), bss_begin_(nullptr), bss_end_(nullptr),
-      is_executable_(is_executable), dlopen_handle_(nullptr),
+    : location_(location),
+      begin_(nullptr),
+      end_(nullptr),
+      bss_begin_(nullptr),
+      bss_end_(nullptr),
+      is_executable_(is_executable),
+      dlopen_handle_(nullptr),
       secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
   CHECK(!location_.empty());
 }
@@ -213,8 +226,10 @@
   }
 }
 
-bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-                     const char* abs_dex_location, std::string* error_msg) {
+bool OatFile::Dlopen(const std::string& elf_filename,
+                     uint8_t* requested_base,
+                     const char* abs_dex_location,
+                     out<std::string> error_msg) {
 #ifdef __APPLE__
   // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
   // but let's fallback to the custom loading code for the time being.
@@ -229,7 +244,7 @@
     *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
     return false;
   }
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   android_dlextinfo extinfo;
   extinfo.flags = ANDROID_DLEXT_FORCE_LOAD | ANDROID_DLEXT_FORCE_FIXED_VADDR;
   dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
@@ -319,22 +334,28 @@
     LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
   }
 
-  return Setup(abs_dex_location, error_msg);
+  return Setup(abs_dex_location, outof_forward(error_msg));
 #endif  // __APPLE__
 }
 
-bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin,
-                          bool writable, bool executable,
+bool OatFile::ElfFileOpen(File* file,
+                          uint8_t* requested_base,
+                          uint8_t* oat_file_begin,
+                          bool writable,
+                          bool executable,
                           const char* abs_dex_location,
-                          std::string* error_msg) {
+                          out<std::string> error_msg) {
   // TODO: rename requested_base to oat_data_begin
-  elf_file_.reset(ElfFile::Open(file, writable, /*program_header_only*/true, error_msg,
+  elf_file_.reset(ElfFile::Open(file,
+                                writable,
+                                /*program_header_only*/true,
+                                outof_forward(error_msg),
                                 oat_file_begin));
   if (elf_file_ == nullptr) {
     DCHECK(!error_msg->empty());
     return false;
   }
-  bool loaded = elf_file_->Load(executable, error_msg);
+  bool loaded = elf_file_->Load(executable, outof_forward(error_msg));
   if (!loaded) {
     DCHECK(!error_msg->empty());
     return false;
@@ -375,10 +396,10 @@
     bss_end_ += sizeof(uint32_t);
   }
 
-  return Setup(abs_dex_location, error_msg);
+  return Setup(abs_dex_location, outof_forward(error_msg));
 }
 
-bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) {
+bool OatFile::Setup(const char* abs_dex_location, out<std::string> error_msg) {
   if (!GetOatHeader().IsValid()) {
     std::string cause = GetOatHeader().GetValidationErrorMessage();
     *error_msg = StringPrintf("Invalid oat header for '%s': %s", GetLocation().c_str(),
@@ -617,9 +638,9 @@
   return reinterpret_cast<const DexFile::Header*>(dex_file_pointer_)->file_size_;
 }
 
-std::unique_ptr<const DexFile> OatFile::OatDexFile::OpenDexFile(std::string* error_msg) const {
+std::unique_ptr<const DexFile> OatFile::OatDexFile::OpenDexFile(out<std::string> error_msg) const {
   return DexFile::Open(dex_file_pointer_, FileSize(), dex_file_location_,
-                       dex_file_location_checksum_, this, error_msg);
+                       dex_file_location_checksum_, this, outof_forward(error_msg));
 }
 
 uint32_t OatFile::OatDexFile::GetOatClassOffset(uint16_t class_def_index) const {
@@ -777,7 +798,7 @@
   return out.str();
 }
 
-bool OatFile::CheckStaticDexFileDependencies(const char* dex_dependencies, std::string* msg) {
+bool OatFile::CheckStaticDexFileDependencies(const char* dex_dependencies, out<std::string> msg) {
   if (dex_dependencies == nullptr || dex_dependencies[0] == 0) {
     // No dependencies.
     return true;
@@ -786,7 +807,7 @@
   // Assumption: this is not performance-critical. So it's OK to do this with a std::string and
   //             Split() instead of manual parsing of the combined char*.
   std::vector<std::string> split;
-  Split(dex_dependencies, kDexClassPathEncodingSeparator, &split);
+  Split(dex_dependencies, kDexClassPathEncodingSeparator, outof(split));
   if (split.size() % 2 != 0) {
     // Expected pairs of location and checksum.
     *msg = StringPrintf("Odd number of elements in dependency list %s", dex_dependencies);
@@ -806,8 +827,8 @@
     uint32_t dex_checksum;
     std::string error_msg;
     if (DexFile::GetChecksum(DexFile::GetDexCanonicalLocation(location.c_str()).c_str(),
-                             &dex_checksum,
-                             &error_msg)) {
+                             outof(dex_checksum),
+                             outof(error_msg))) {
       if (converted != dex_checksum) {
         *msg = StringPrintf("Checksums don't match for %s: %" PRId64 " vs %u",
                             location.c_str(), converted, dex_checksum);
@@ -826,8 +847,7 @@
 }
 
 bool OatFile::GetDexLocationsFromDependencies(const char* dex_dependencies,
-                                              std::vector<std::string>* locations) {
-  DCHECK(locations != nullptr);
+                                              out<std::vector<std::string>> locations) {
   if (dex_dependencies == nullptr || dex_dependencies[0] == 0) {
     return true;
   }
@@ -835,7 +855,7 @@
   // Assumption: this is not performance-critical. So it's OK to do this with a std::string and
   //             Split() instead of manual parsing of the combined char*.
   std::vector<std::string> split;
-  Split(dex_dependencies, kDexClassPathEncodingSeparator, &split);
+  Split(dex_dependencies, kDexClassPathEncodingSeparator, outof(split));
   if (split.size() % 2 != 0) {
     // Expected pairs of location and checksum.
     return false;
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 27f8677..6c40c68 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "base/mutex.h"
+#include "base/out_fwd.h"
 #include "base/stringpiece.h"
 #include "dex_file.h"
 #include "invoke_type.h"
@@ -45,9 +46,10 @@
 
   // Opens an oat file contained within the given elf file. This is always opened as
   // non-executable at the moment.
-  static OatFile* OpenWithElfFile(ElfFile* elf_file, const std::string& location,
+  static OatFile* OpenWithElfFile(ElfFile* elf_file,
+                                  const std::string& location,
                                   const char* abs_dex_location,
-                                  std::string* error_msg);
+                                  out<std::string> error_msg);
   // Open an oat file. Returns null on failure.  Requested base can
   // optionally be used to request where the file should be loaded.
   // See the ResolveRelativeEncodedDexLocation for a description of how the
@@ -58,20 +60,22 @@
                        uint8_t* oat_file_begin,
                        bool executable,
                        const char* abs_dex_location,
-                       std::string* error_msg);
+                       out<std::string> error_msg);
 
   // Open an oat file from an already opened File.
   // Does not use dlopen underneath so cannot be used for runtime use
   // where relocations may be required. Currently used from
   // ImageWriter which wants to open a writable version from an existing
   // file descriptor for patching.
-  static OatFile* OpenWritable(File* file, const std::string& location,
+  static OatFile* OpenWritable(File* file,
+                               const std::string& location,
                                const char* abs_dex_location,
-                               std::string* error_msg);
+                               out<std::string> error_msg);
   // Opens an oat file from an already opened File. Maps it PROT_READ, MAP_PRIVATE.
-  static OatFile* OpenReadable(File* file, const std::string& location,
+  static OatFile* OpenReadable(File* file,
+                               const std::string& location,
                                const char* abs_dex_location,
-                               std::string* error_msg);
+                               out<std::string> error_msg);
 
   ~OatFile();
 
@@ -252,12 +256,13 @@
 
   // Check the given dependency list against their dex files - thus the name "Static," this does
   // not check the class-loader environment, only whether there have been file updates.
-  static bool CheckStaticDexFileDependencies(const char* dex_dependencies, std::string* msg);
+  static bool CheckStaticDexFileDependencies(const char* dex_dependencies,
+                                             out<std::string> error_msg);
 
   // Get the dex locations of a dependency list. Note: this is *not* cleaned for synthetic
   // locations of multidex files.
   static bool GetDexLocationsFromDependencies(const char* dex_dependencies,
-                                              std::vector<std::string>* locations);
+                                              out<std::vector<std::string>> locations);
 
  private:
   static void CheckLocation(const std::string& location);
@@ -266,7 +271,7 @@
                              const std::string& location,
                              uint8_t* requested_base,
                              const char* abs_dex_location,
-                             std::string* error_msg);
+                             out<std::string> error_msg);
 
   static OatFile* OpenElfFile(File* file,
                               const std::string& location,
@@ -275,18 +280,22 @@
                               bool writable,
                               bool executable,
                               const char* abs_dex_location,
-                              std::string* error_msg);
+                              out<std::string> error_msg);
 
   explicit OatFile(const std::string& filename, bool executable);
-  bool Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-              const char* abs_dex_location, std::string* error_msg);
-  bool ElfFileOpen(File* file, uint8_t* requested_base,
+  bool Dlopen(const std::string& elf_filename,
+              uint8_t* requested_base,
+              const char* abs_dex_location,
+              out<std::string> error_msg);
+  bool ElfFileOpen(File* file,
+                   uint8_t* requested_base,
                    uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
-                   bool writable, bool executable,
+                   bool writable,
+                   bool executable,
                    const char* abs_dex_location,
-                   std::string* error_msg);
+                   out<std::string> error_msg);
 
-  bool Setup(const char* abs_dex_location, std::string* error_msg);
+  bool Setup(const char* abs_dex_location, out<std::string> error_msg);
 
   // The oat file name.
   //
@@ -365,7 +374,7 @@
 class OatDexFile FINAL {
  public:
   // Opens the DexFile referred to by this OatDexFile from within the containing OatFile.
-  std::unique_ptr<const DexFile> OpenDexFile(std::string* error_msg) const;
+  std::unique_ptr<const DexFile> OpenDexFile(out<std::string> error_msg) const;
 
   const OatFile* GetOatFile() const {
     return oat_file_;
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 29b879e..e919b36 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -29,6 +29,7 @@
 #include <set>
 
 #include "base/logging.h"
+#include "base/out.h"
 #include "base/stringprintf.h"
 #include "class_linker.h"
 #include "gc/heap.h"
@@ -230,7 +231,7 @@
     return std::vector<std::unique_ptr<const DexFile>>();
   }
 
-  std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+  std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(outof(error_msg));
   if (dex_file.get() == nullptr) {
     LOG(WARNING) << "Failed to open dex file from oat dex file: " << error_msg;
     return std::vector<std::unique_ptr<const DexFile>>();
@@ -246,7 +247,7 @@
       break;
     }
 
-    dex_file = oat_dex_file->OpenDexFile(&error_msg);
+    dex_file = oat_dex_file->OpenDexFile(outof(error_msg));
     if (dex_file.get() == nullptr) {
       LOG(WARNING) << "Failed to open dex file from oat dex file: " << error_msg;
       return std::vector<std::unique_ptr<const DexFile>>();
@@ -271,7 +272,7 @@
 
     std::string error_msg;
     cached_odex_file_name_found_ = DexFilenameToOdexFilename(
-        dex_location_, isa_, &cached_odex_file_name_, &error_msg);
+        dex_location_, isa_, &cached_odex_file_name_, outof(error_msg));
     if (!cached_odex_file_name_found_) {
       // If we can't figure out the odex file, we treat it as if the odex
       // file was inaccessible.
@@ -339,7 +340,7 @@
         DalvikCacheDirectory().c_str(), GetInstructionSetString(isa_));
     std::string error_msg;
     cached_oat_file_name_found_ = GetDalvikCacheFilename(dex_location_,
-        cache_dir.c_str(), &cached_oat_file_name_, &error_msg);
+        cache_dir.c_str(), &cached_oat_file_name_, outof(error_msg));
     if (!cached_oat_file_name_found_) {
       // If we can't determine the oat file name, we treat the oat file as
       // inaccessible.
@@ -432,7 +433,7 @@
     std::string error_msg;
     uint32_t expected_secondary_checksum = 0;
     if (DexFile::GetChecksum(secondary_dex_location.c_str(),
-          &expected_secondary_checksum, &error_msg)) {
+          &expected_secondary_checksum, outof(error_msg))) {
       uint32_t actual_secondary_checksum
         = secondary_oat_dex_file->GetDexFileLocationChecksum();
       if (expected_secondary_checksum != actual_secondary_checksum) {
@@ -722,7 +723,7 @@
   if (runtime->IsDebuggable()) {
     argv.push_back("--debuggable");
   }
-  runtime->AddCurrentRuntimeFeaturesAsDex2OatArguments(&argv);
+  runtime->AddCurrentRuntimeFeaturesAsDex2OatArguments(outof(argv));
 
   if (!runtime->IsVerificationEnabled()) {
     argv.push_back("--compiler-filter=verify-none");
@@ -873,7 +874,7 @@
       std::string error_msg;
       cached_odex_file_.reset(OatFile::Open(odex_file_name.c_str(),
             odex_file_name.c_str(), nullptr, nullptr, load_executable_,
-            dex_location_, &error_msg));
+            dex_location_, outof(error_msg)));
       if (cached_odex_file_.get() == nullptr) {
         VLOG(oat) << "OatFileAssistant test for existing pre-compiled oat file "
           << odex_file_name << ": " << error_msg;
@@ -904,7 +905,7 @@
       std::string error_msg;
       cached_oat_file_.reset(OatFile::Open(oat_file_name.c_str(),
             oat_file_name.c_str(), nullptr, nullptr, load_executable_,
-            dex_location_, &error_msg));
+            dex_location_, outof(error_msg)));
       if (cached_oat_file_.get() == nullptr) {
         VLOG(oat) << "OatFileAssistant test for existing oat file "
           << oat_file_name << ": " << error_msg;
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 03ad2d5..4a0de59 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -26,6 +26,7 @@
 #include <gtest/gtest.h>
 
 #include "art_field-inl.h"
+#include "base/out.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "compiler_callbacks.h"
@@ -87,7 +88,7 @@
       << "Expected dex file to be at: " << GetDexSrc1();
     ASSERT_TRUE(OS::FileExists(GetStrippedDexSrc1().c_str()))
       << "Expected stripped dex file to be at: " << GetStrippedDexSrc1();
-    ASSERT_FALSE(DexFile::GetChecksum(GetStrippedDexSrc1().c_str(), &checksum, &error_msg))
+    ASSERT_FALSE(DexFile::GetChecksum(GetStrippedDexSrc1().c_str(), &checksum, outof(error_msg)))
       << "Expected stripped dex file to be stripped: " << GetStrippedDexSrc1();
     ASSERT_TRUE(OS::FileExists(GetDexSrc2().c_str()))
       << "Expected dex file to be at: " << GetDexSrc2();
@@ -96,12 +97,12 @@
     // GetMultiDexSrc1, but a different secondary dex checksum.
     std::vector<std::unique_ptr<const DexFile>> multi1;
     ASSERT_TRUE(DexFile::Open(GetMultiDexSrc1().c_str(),
-          GetMultiDexSrc1().c_str(), &error_msg, &multi1)) << error_msg;
+          GetMultiDexSrc1().c_str(), outof(error_msg), &multi1)) << error_msg;
     ASSERT_GT(multi1.size(), 1u);
 
     std::vector<std::unique_ptr<const DexFile>> multi2;
     ASSERT_TRUE(DexFile::Open(GetMultiDexSrc2().c_str(),
-          GetMultiDexSrc2().c_str(), &error_msg, &multi2)) << error_msg;
+          GetMultiDexSrc2().c_str(), outof(error_msg), &multi2)) << error_msg;
     ASSERT_GT(multi2.size(), 1u);
 
     ASSERT_EQ(multi1[0]->GetLocationChecksum(), multi2[0]->GetLocationChecksum());
@@ -231,13 +232,13 @@
     args.push_back("--runtime-arg");
     args.push_back("-Xnorelocate");
     std::string error_msg;
-    ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
+    ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, outof(error_msg))) << error_msg;
     setenv("ANDROID_DATA", android_data_.c_str(), 1);
 
     // Verify the odex file was generated as expected.
     std::unique_ptr<OatFile> odex_file(OatFile::Open(
         odex_location.c_str(), odex_location.c_str(), nullptr, nullptr,
-        false, dex_location.c_str(), &error_msg));
+        false, dex_location.c_str(), outof(error_msg)));
     ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
 
     if (!pic) {
@@ -283,7 +284,7 @@
       image_reservation_.push_back(std::unique_ptr<MemMap>(
           MemMap::MapAnonymous("image reservation",
               reinterpret_cast<uint8_t*>(start), end - start,
-              PROT_NONE, false, false, &error_msg)));
+              PROT_NONE, false, false, outof(error_msg))));
       ASSERT_TRUE(image_reservation_.back().get() != nullptr) << error_msg;
       LOG(INFO) << "Reserved space for image " <<
         reinterpret_cast<void*>(image_reservation_.back()->Begin()) << "-" <<
@@ -318,7 +319,7 @@
   OatFileAssistant oat_file_assistant(dex_location, kRuntimeISA, false);
 
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.GenerateOatFile(&error_msg)) << error_msg;
+  ASSERT_TRUE(oat_file_assistant.GenerateOatFile(outof(error_msg))) << error_msg;
 }
 
 // Case: We have a DEX file, but no OAT file for it.
@@ -357,7 +358,7 @@
 
   // Trying to make the oat file up to date should not fail or crash.
   std::string error_msg;
-  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg));
+  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(outof(error_msg)));
 
   // Trying to get the best oat file should fail, but not crash.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
@@ -441,7 +442,7 @@
   args.push_back("--oat-file=" + oat_location);
 
   std::string error_msg;
-  ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
+  ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, outof(error_msg))) << error_msg;
 
   // Verify we can load both dex files.
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
@@ -540,7 +541,7 @@
 
   // Make the oat file up to date.
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(outof(error_msg))) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -596,7 +597,7 @@
 
   // Make the oat file up to date.
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(outof(error_msg))) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -644,7 +645,7 @@
 
   // Make the oat file up to date. This should have no effect.
   std::string error_msg;
-  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(outof(error_msg))) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -688,7 +689,7 @@
 
   // Make the oat file up to date.
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(outof(error_msg))) << error_msg;
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -829,7 +830,7 @@
   OatFileAssistant oat_file_assistant(
       dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
   std::string error_msg;
-  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(outof(error_msg))) << error_msg;
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -919,7 +920,7 @@
 
   // Trying to make it up to date should have no effect.
   std::string error_msg;
-  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg));
+  EXPECT_TRUE(oat_file_assistant.MakeUpToDate(outof(error_msg)));
   EXPECT_TRUE(error_msg.empty());
 }
 
@@ -958,7 +959,9 @@
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
     std::vector<std::unique_ptr<const DexFile>> dex_files;
     std::vector<std::string> error_msgs;
-    dex_files = linker->OpenDexFilesFromOat(dex_location_.c_str(), oat_location_.c_str(), &error_msgs);
+    dex_files = linker->OpenDexFilesFromOat(dex_location_.c_str(),
+                                            oat_location_.c_str(),
+                                            outof(error_msgs));
     CHECK(!dex_files.empty()) << Join(error_msgs, '\n');
     CHECK(dex_files[0]->GetOatDexFile() != nullptr) << dex_files[0]->GetLocation();
     loaded_oat_file_ = dex_files[0]->GetOatDexFile()->GetOatFile();
@@ -1055,17 +1058,17 @@
   std::string odex_file;
 
   EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
-        "/foo/bar/baz.jar", kArm, &odex_file, &error_msg)) << error_msg;
+        "/foo/bar/baz.jar", kArm, &odex_file, outof(error_msg))) << error_msg;
   EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
 
   EXPECT_TRUE(OatFileAssistant::DexFilenameToOdexFilename(
-        "/foo/bar/baz.funnyext", kArm, &odex_file, &error_msg)) << error_msg;
+        "/foo/bar/baz.funnyext", kArm, &odex_file, outof(error_msg))) << error_msg;
   EXPECT_EQ("/foo/bar/oat/arm/baz.odex", odex_file);
 
   EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
-        "nopath.jar", kArm, &odex_file, &error_msg));
+        "nopath.jar", kArm, &odex_file, outof(error_msg)));
   EXPECT_FALSE(OatFileAssistant::DexFilenameToOdexFilename(
-        "/foo/bar/baz_noext", kArm, &odex_file, &error_msg));
+        "/foo/bar/baz_noext", kArm, &odex_file, outof(error_msg)));
 }
 
 // Verify the dexopt status values from dalvik.system.DexFile
diff --git a/runtime/oat_file_test.cc b/runtime/oat_file_test.cc
index a88553c..3bd6df2 100644
--- a/runtime/oat_file_test.cc
+++ b/runtime/oat_file_test.cc
@@ -20,6 +20,7 @@
 
 #include <gtest/gtest.h>
 
+#include "base/out.h"
 #include "common_runtime_test.h"
 #include "scoped_thread_state_change.h"
 
@@ -75,16 +76,16 @@
   std::string error_msg;
 
   // No dependencies.
-  EXPECT_TRUE(OatFile::CheckStaticDexFileDependencies(nullptr, &error_msg)) << error_msg;
-  EXPECT_TRUE(OatFile::CheckStaticDexFileDependencies("", &error_msg)) << error_msg;
+  EXPECT_TRUE(OatFile::CheckStaticDexFileDependencies(nullptr, outof(error_msg))) << error_msg;
+  EXPECT_TRUE(OatFile::CheckStaticDexFileDependencies("", outof(error_msg))) << error_msg;
 
   // Ill-formed dependencies.
-  EXPECT_FALSE(OatFile::CheckStaticDexFileDependencies("abc", &error_msg));
-  EXPECT_FALSE(OatFile::CheckStaticDexFileDependencies("abc*123*def", &error_msg));
-  EXPECT_FALSE(OatFile::CheckStaticDexFileDependencies("abc*def*", &error_msg));
+  EXPECT_FALSE(OatFile::CheckStaticDexFileDependencies("abc", outof(error_msg)));
+  EXPECT_FALSE(OatFile::CheckStaticDexFileDependencies("abc*123*def", outof(error_msg)));
+  EXPECT_FALSE(OatFile::CheckStaticDexFileDependencies("abc*def*", outof(error_msg)));
 
   // Unsatisfiable dependency.
-  EXPECT_FALSE(OatFile::CheckStaticDexFileDependencies("abc*123*", &error_msg));
+  EXPECT_FALSE(OatFile::CheckStaticDexFileDependencies("abc*123*", outof(error_msg)));
 
   // Load some dex files to be able to do a real test.
   ScopedObjectAccess soa(Thread::Current());
@@ -92,10 +93,10 @@
   std::vector<std::unique_ptr<const DexFile>> dex_files1 = OpenTestDexFiles("Main");
   std::vector<const DexFile*> dex_files_const1 = ToConstDexFiles(dex_files1);
   std::string encoding1 = OatFile::EncodeDexFileDependencies(dex_files_const1);
-  EXPECT_TRUE(OatFile::CheckStaticDexFileDependencies(encoding1.c_str(), &error_msg))
+  EXPECT_TRUE(OatFile::CheckStaticDexFileDependencies(encoding1.c_str(), outof(error_msg)))
       << error_msg << " " << encoding1;
   std::vector<std::string> split1;
-  EXPECT_TRUE(OatFile::GetDexLocationsFromDependencies(encoding1.c_str(), &split1));
+  EXPECT_TRUE(OatFile::GetDexLocationsFromDependencies(encoding1.c_str(), outof(split1)));
   ASSERT_EQ(split1.size(), 1U);
   EXPECT_EQ(split1[0], dex_files_const1[0]->GetLocation());
 
@@ -103,10 +104,10 @@
   EXPECT_GT(dex_files2.size(), 1U);
   std::vector<const DexFile*> dex_files_const2 = ToConstDexFiles(dex_files2);
   std::string encoding2 = OatFile::EncodeDexFileDependencies(dex_files_const2);
-  EXPECT_TRUE(OatFile::CheckStaticDexFileDependencies(encoding2.c_str(), &error_msg))
+  EXPECT_TRUE(OatFile::CheckStaticDexFileDependencies(encoding2.c_str(), outof(error_msg)))
       << error_msg << " " << encoding2;
   std::vector<std::string> split2;
-  EXPECT_TRUE(OatFile::GetDexLocationsFromDependencies(encoding2.c_str(), &split2));
+  EXPECT_TRUE(OatFile::GetDexLocationsFromDependencies(encoding2.c_str(), outof(split2)));
   ASSERT_EQ(split2.size(), 2U);
   EXPECT_EQ(split2[0], dex_files_const2[0]->GetLocation());
   EXPECT_EQ(split2[1], dex_files_const2[1]->GetLocation());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 25b5e49..7f82497 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -18,6 +18,7 @@
 
 #include <sstream>
 
+#include "base/out.h"
 #include "base/stringpiece.h"
 #include "debugger.h"
 #include "gc/heap.h"
@@ -41,12 +42,11 @@
                                                     // Runtime::Abort
 }
 
-ParsedOptions* ParsedOptions::Create(const RuntimeOptions& options, bool ignore_unrecognized,
-                                     RuntimeArgumentMap* runtime_options) {
-  CHECK(runtime_options != nullptr);
-
+ParsedOptions* ParsedOptions::Create(const RuntimeOptions& options,
+                                     bool ignore_unrecognized,
+                                     out<RuntimeArgumentMap> runtime_options) {
   std::unique_ptr<ParsedOptions> parsed(new ParsedOptions());
-  if (parsed->Parse(options, ignore_unrecognized, runtime_options)) {
+  if (parsed->Parse(options, ignore_unrecognized, outof_forward(runtime_options))) {
     return parsed.release();
   }
   return nullptr;
@@ -293,6 +293,7 @@
 // As a side-effect, populate the hooks from options.
 bool ParsedOptions::ProcessSpecialOptions(const RuntimeOptions& options,
                                           RuntimeArgumentMap* runtime_options,
+                                          // TODO: should be an optional_out here.
                                           std::vector<std::string>* out_options) {
   using M = RuntimeArgumentMap;
 
@@ -399,7 +400,7 @@
 }
 
 bool ParsedOptions::Parse(const RuntimeOptions& options, bool ignore_unrecognized,
-                          RuntimeArgumentMap* runtime_options) {
+                          out<RuntimeArgumentMap> runtime_options) {
   for (size_t i = 0; i < options.size(); ++i) {
     if (true && options[0].first == "-Xzygote") {
       LOG(INFO) << "option[" << i << "]=" << options[i].first;
@@ -410,7 +411,9 @@
 
   // Convert to a simple string list (without the magic pointer options)
   std::vector<std::string> argv_list;
-  if (!ProcessSpecialOptions(options, nullptr, &argv_list)) {
+  if (!ProcessSpecialOptions(options,
+                             nullptr,  // No runtime argument map
+                             outof(argv_list))) {
     return false;
   }
 
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 529dd5c..bcd6228 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -22,6 +22,7 @@
 
 #include <jni.h>
 
+#include "base/out_fwd.h"
 #include "globals.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
@@ -50,8 +51,9 @@
   static std::unique_ptr<RuntimeParser> MakeParser(bool ignore_unrecognized);
 
   // returns true if parsing succeeds, and stores the resulting options into runtime_options
-  static ParsedOptions* Create(const RuntimeOptions& options, bool ignore_unrecognized,
-                               RuntimeArgumentMap* runtime_options);
+  static ParsedOptions* Create(const RuntimeOptions& options,
+                               bool ignore_unrecognized,
+                               out<RuntimeArgumentMap> runtime_options);
 
   bool (*hook_is_sensitive_thread_)();
   jint (*hook_vfprintf_)(FILE* stream, const char* format, va_list ap);
@@ -63,6 +65,7 @@
 
   bool ProcessSpecialOptions(const RuntimeOptions& options,
                              RuntimeArgumentMap* runtime_options,
+                             // Optional out:
                              std::vector<std::string>* out_options);
 
   void Usage(const char* fmt, ...);
@@ -72,8 +75,9 @@
   void Exit(int status);
   void Abort();
 
-  bool Parse(const RuntimeOptions& options,  bool ignore_unrecognized,
-             RuntimeArgumentMap* runtime_options);
+  bool Parse(const RuntimeOptions& options,
+             bool ignore_unrecognized,
+             out<RuntimeArgumentMap> runtime_options);
 };
 
 }  // namespace art
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index a8575de..81a48a6 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -18,6 +18,7 @@
 
 #include <memory>
 
+#include "base/out.h"
 #include "common_runtime_test.h"
 
 namespace art {
@@ -60,7 +61,7 @@
   options.push_back(std::make_pair("exit", test_exit));
 
   RuntimeArgumentMap map;
-  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, &map));
+  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, outof(map)));
   ASSERT_TRUE(parsed.get() != nullptr);
   ASSERT_NE(0u, map.Size());
 
@@ -102,7 +103,7 @@
   options.push_back(std::make_pair("-Xgc:MC", nullptr));
 
   RuntimeArgumentMap map;
-  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, &map));
+  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, outof(map)));
   ASSERT_TRUE(parsed.get() != nullptr);
   ASSERT_NE(0u, map.Size());
 
diff --git a/runtime/prebuilt_tools_test.cc b/runtime/prebuilt_tools_test.cc
index 53bc876..a7f7bcd 100644
--- a/runtime/prebuilt_tools_test.cc
+++ b/runtime/prebuilt_tools_test.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 // Run the tests only on host.
-#ifndef HAVE_ANDROID_OS
+#ifndef __ANDROID__
 
 class PrebuiltToolsTest : public CommonRuntimeTest {
 };
@@ -61,6 +61,6 @@
   }
 }
 
-#endif  // HAVE_ANDROID_OS
+#endif  // __ANDROID__
 
 }  // namespace art
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 65bbcbe..75ff27f 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -39,6 +39,7 @@
   kIntrinsicFloatCvt,
   kIntrinsicReverseBits,
   kIntrinsicReverseBytes,
+  kIntrinsicNumberOfLeadingZeros,
   kIntrinsicAbsInt,
   kIntrinsicAbsLong,
   kIntrinsicAbsFloat,
diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h
index 4f408dd..710c21f 100644
--- a/runtime/read_barrier_c.h
+++ b/runtime/read_barrier_c.h
@@ -47,9 +47,4 @@
 #error "Only one of Baker or Brooks can be enabled at a time."
 #endif
 
-// A placeholder marker to indicate places to add read barriers in the
-// assembly code. This is a development time aid and to be removed
-// after read barriers are added.
-#define THIS_LOAD_REQUIRES_READ_BARRIER
-
 #endif  // ART_RUNTIME_READ_BARRIER_C_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1914124..a27acb2 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -56,6 +56,7 @@
 #include "atomic.h"
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
+#include "base/out.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
@@ -307,8 +308,8 @@
     Thread* self = Thread::Current();
     if (self == nullptr) {
       os << "(Aborting thread was not attached to runtime!)\n";
-      DumpKernelStack(os, GetTid(), "  kernel: ", false);
-      DumpNativeStack(os, GetTid(), "  native: ", nullptr);
+      DumpKernelStack(os, GetTid(), "  kernel: ", false /* don't include count */);
+      DumpNativeStack(os, GetTid(), "  native: ", nullptr /* no ucontext ptr */);
     } else {
       os << "Aborting thread:\n";
       if (Locks::mutator_lock_->IsExclusiveHeld(self) || Locks::mutator_lock_->IsSharedHeld(self)) {
@@ -417,7 +418,7 @@
   if (Runtime::instance_ != nullptr) {
     return false;
   }
-  InitLogging(nullptr);  // Calls Locks::Init() as a side effect.
+  InitLogging(nullptr /* no argv */);  // Calls Locks::Init() as a side effect.
   instance_ = new Runtime;
   if (!instance_->Init(options, ignore_unrecognized)) {
     // TODO: Currently deleting the instance will abort the runtime on destruction. Now This will
@@ -659,7 +660,7 @@
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
 
-  if (jit_.get() == nullptr && jit_options_->UseJIT()) {
+  if (jit_ == nullptr && jit_options_->UseJIT()) {
     // Create the JIT if the flag is set and we haven't already create it (happens for run-tests).
     CreateJit();
   }
@@ -707,9 +708,8 @@
 }
 
 static bool OpenDexFilesFromImage(const std::string& image_location,
-                                  std::vector<std::unique_ptr<const DexFile>>* dex_files,
-                                  size_t* failures) {
-  DCHECK(dex_files != nullptr) << "OpenDexFilesFromImage: out-param is nullptr";
+                                  out<std::vector<std::unique_ptr<const DexFile>>> dex_files,
+                                  out<size_t> failures) {
   std::string system_filename;
   bool has_system = false;
   std::string cache_filename_unused;
@@ -718,12 +718,12 @@
   bool is_global_cache_unused;
   bool found_image = gc::space::ImageSpace::FindImageFilename(image_location.c_str(),
                                                               kRuntimeISA,
-                                                              &system_filename,
-                                                              &has_system,
-                                                              &cache_filename_unused,
-                                                              &dalvik_cache_exists_unused,
-                                                              &has_cache_unused,
-                                                              &is_global_cache_unused);
+                                                              outof(system_filename),
+                                                              outof(has_system),
+                                                              outof(cache_filename_unused),
+                                                              outof(dalvik_cache_exists_unused),
+                                                              outof(has_cache_unused),
+                                                              outof(is_global_cache_unused));
   *failures = 0;
   if (!found_image || !has_system) {
     return false;
@@ -737,12 +737,12 @@
   if (file.get() == nullptr) {
     return false;
   }
-  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file.release(), false, false, &error_msg));
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file.release(), false, false, outof(error_msg)));
   if (elf_file.get() == nullptr) {
     return false;
   }
   std::unique_ptr<OatFile> oat_file(OatFile::OpenWithElfFile(elf_file.release(), oat_location,
-                                                             nullptr, &error_msg));
+                                                             nullptr, outof(error_msg)));
   if (oat_file.get() == nullptr) {
     LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
     return false;
@@ -753,7 +753,7 @@
       *failures += 1;
       continue;
     }
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(outof(error_msg));
     if (dex_file.get() == nullptr) {
       *failures += 1;
     } else {
@@ -768,10 +768,11 @@
 static size_t OpenDexFiles(const std::vector<std::string>& dex_filenames,
                            const std::vector<std::string>& dex_locations,
                            const std::string& image_location,
-                           std::vector<std::unique_ptr<const DexFile>>* dex_files) {
-  DCHECK(dex_files != nullptr) << "OpenDexFiles: out-param is nullptr";
+                           out<std::vector<std::unique_ptr<const DexFile>>> dex_files) {
   size_t failure_count = 0;
-  if (!image_location.empty() && OpenDexFilesFromImage(image_location, dex_files, &failure_count)) {
+  if (!image_location.empty() && OpenDexFilesFromImage(image_location,
+                                                       outof_forward(dex_files),
+                                                       outof(failure_count))) {
     return failure_count;
   }
   failure_count = 0;
@@ -783,7 +784,7 @@
       LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
       continue;
     }
-    if (!DexFile::Open(dex_filename, dex_location, &error_msg, dex_files)) {
+    if (!DexFile::Open(dex_filename, dex_location, outof(error_msg), outof_forward(dex_files))) {
       LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
       ++failure_count;
     }
@@ -800,7 +801,7 @@
   using Opt = RuntimeArgumentMap;
   RuntimeArgumentMap runtime_options;
   std::unique_ptr<ParsedOptions> parsed_options(
-      ParsedOptions::Create(raw_options, ignore_unrecognized, &runtime_options));
+      ParsedOptions::Create(raw_options, ignore_unrecognized, outof(runtime_options)));
   if (parsed_options.get() == nullptr) {
     LOG(ERROR) << "Failed to parse options";
     ATRACE_END();
@@ -1038,7 +1039,7 @@
     OpenDexFiles(dex_filenames,
                  dex_locations,
                  runtime_options.GetOrDefault(Opt::Image),
-                 &boot_class_path);
+                 outof(boot_class_path));
     instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet);
     class_linker_->InitWithoutImage(std::move(boot_class_path));
 
@@ -1167,7 +1168,7 @@
   // the library that implements System.loadLibrary!
   {
     std::string reason;
-    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, &reason)) {
+    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, outof(reason))) {
       LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << reason;
     }
   }
@@ -1330,7 +1331,9 @@
   signals.Block();
 }
 
-bool Runtime::AttachCurrentThread(const char* thread_name, bool as_daemon, jobject thread_group,
+bool Runtime::AttachCurrentThread(const char* thread_name,
+                                  bool as_daemon,
+                                  jobject thread_group,
                                   bool create_peer) {
   return Thread::Attach(thread_name, as_daemon, thread_group, create_peer) != nullptr;
 }
@@ -1436,7 +1439,8 @@
   thread_list_->VisitRoots(visitor);
 }
 
-size_t Runtime::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+size_t Runtime::FlipThreadRoots(Closure* thread_flip_visitor,
+                                Closure* flip_callback,
                                 gc::collector::GarbageCollector* collector) {
   return thread_list_->FlipThreadRoots(thread_flip_visitor, flip_callback, collector);
 }
@@ -1623,50 +1627,64 @@
   preinitialization_transaction_->ThrowAbortError(self, nullptr);
 }
 
-void Runtime::RecordWriteFieldBoolean(mirror::Object* obj, MemberOffset field_offset,
-                                      uint8_t value, bool is_volatile) const {
+void Runtime::RecordWriteFieldBoolean(mirror::Object* obj,
+                                      MemberOffset field_offset,
+                                      uint8_t value,
+                                      bool is_volatile) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWriteFieldBoolean(obj, field_offset, value, is_volatile);
 }
 
-void Runtime::RecordWriteFieldByte(mirror::Object* obj, MemberOffset field_offset,
-                                   int8_t value, bool is_volatile) const {
+void Runtime::RecordWriteFieldByte(mirror::Object* obj,
+                                   MemberOffset field_offset,
+                                   int8_t value,
+                                   bool is_volatile) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWriteFieldByte(obj, field_offset, value, is_volatile);
 }
 
-void Runtime::RecordWriteFieldChar(mirror::Object* obj, MemberOffset field_offset,
-                                   uint16_t value, bool is_volatile) const {
+void Runtime::RecordWriteFieldChar(mirror::Object* obj,
+                                   MemberOffset field_offset,
+                                   uint16_t value,
+                                   bool is_volatile) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWriteFieldChar(obj, field_offset, value, is_volatile);
 }
 
-void Runtime::RecordWriteFieldShort(mirror::Object* obj, MemberOffset field_offset,
-                                    int16_t value, bool is_volatile) const {
+void Runtime::RecordWriteFieldShort(mirror::Object* obj,
+                                    MemberOffset field_offset,
+                                    int16_t value,
+                                    bool is_volatile) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWriteFieldShort(obj, field_offset, value, is_volatile);
 }
 
-void Runtime::RecordWriteField32(mirror::Object* obj, MemberOffset field_offset,
-                                 uint32_t value, bool is_volatile) const {
+void Runtime::RecordWriteField32(mirror::Object* obj,
+                                 MemberOffset field_offset,
+                                 uint32_t value,
+                                 bool is_volatile) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWriteField32(obj, field_offset, value, is_volatile);
 }
 
-void Runtime::RecordWriteField64(mirror::Object* obj, MemberOffset field_offset,
-                                 uint64_t value, bool is_volatile) const {
+void Runtime::RecordWriteField64(mirror::Object* obj,
+                                 MemberOffset field_offset,
+                                 uint64_t value,
+                                 bool is_volatile) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWriteField64(obj, field_offset, value, is_volatile);
 }
 
-void Runtime::RecordWriteFieldReference(mirror::Object* obj, MemberOffset field_offset,
-                                        mirror::Object* value, bool is_volatile) const {
+void Runtime::RecordWriteFieldReference(mirror::Object* obj,
+                                        MemberOffset field_offset,
+                                        mirror::Object* value,
+                                        bool is_volatile) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWriteFieldReference(obj, field_offset, value, is_volatile);
@@ -1707,7 +1725,7 @@
   fault_message_ = message;
 }
 
-void Runtime::AddCurrentRuntimeFeaturesAsDex2OatArguments(std::vector<std::string>* argv)
+void Runtime::AddCurrentRuntimeFeaturesAsDex2OatArguments(out<std::vector<std::string>> argv)
     const {
   if (GetInstrumentation()->InterpretOnly() || UseJit()) {
     argv->push_back("--compiler-filter=interpret-only");
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 4577b75..206623e 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -28,6 +28,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/macros.h"
+#include "base/out.h"
 #include "gc_root.h"
 #include "instrumentation.h"
 #include "jobject_comparator.h"
@@ -224,7 +225,9 @@
   jobject GetSystemClassLoader() const;
 
   // Attaches the calling native thread to the runtime.
-  bool AttachCurrentThread(const char* thread_name, bool as_daemon, jobject thread_group,
+  bool AttachCurrentThread(const char* thread_name,
+                           bool as_daemon,
+                           jobject thread_group,
                            bool create_peer);
 
   void CallExitHook(jint status);
@@ -286,8 +289,7 @@
 
   mirror::Throwable* GetPreAllocatedOutOfMemoryError() SHARED_REQUIRES(Locks::mutator_lock_);
 
-  mirror::Throwable* GetPreAllocatedNoClassDefFoundError()
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::Throwable* GetPreAllocatedNoClassDefFoundError() SHARED_REQUIRES(Locks::mutator_lock_);
 
   const std::vector<std::string>& GetProperties() const {
     return properties_;
@@ -316,8 +318,7 @@
   void VisitImageRoots(RootVisitor* visitor) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Visit all of the roots we can do safely do concurrently.
-  void VisitConcurrentRoots(RootVisitor* visitor,
-                            VisitRootFlags flags = kVisitRootFlagAllRoots)
+  void VisitConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags = kVisitRootFlagAllRoots)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Visit all of the non thread roots, we can do this with mutators unpaused.
@@ -331,7 +332,8 @@
   void VisitThreadRoots(RootVisitor* visitor) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Flip thread roots from from-space refs to to-space refs.
-  size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
+  size_t FlipThreadRoots(Closure* thread_flip_visitor,
+                         Closure* flip_callback,
                          gc::collector::GarbageCollector* collector)
       REQUIRES(!Locks::mutator_lock_);
 
@@ -467,20 +469,34 @@
   void ThrowTransactionAbortError(Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void RecordWriteFieldBoolean(mirror::Object* obj, MemberOffset field_offset, uint8_t value,
+  void RecordWriteFieldBoolean(mirror::Object* obj,
+                               MemberOffset field_offset,
+                               uint8_t value,
                                bool is_volatile) const;
-  void RecordWriteFieldByte(mirror::Object* obj, MemberOffset field_offset, int8_t value,
+  void RecordWriteFieldByte(mirror::Object* obj,
+                            MemberOffset field_offset,
+                            int8_t value,
                             bool is_volatile) const;
-  void RecordWriteFieldChar(mirror::Object* obj, MemberOffset field_offset, uint16_t value,
+  void RecordWriteFieldChar(mirror::Object* obj,
+                            MemberOffset field_offset,
+                            uint16_t value,
                             bool is_volatile) const;
-  void RecordWriteFieldShort(mirror::Object* obj, MemberOffset field_offset, int16_t value,
+  void RecordWriteFieldShort(mirror::Object* obj,
+                             MemberOffset field_offset,
+                             int16_t value,
                           bool is_volatile) const;
-  void RecordWriteField32(mirror::Object* obj, MemberOffset field_offset, uint32_t value,
+  void RecordWriteField32(mirror::Object* obj,
+                          MemberOffset field_offset,
+                          uint32_t value,
                           bool is_volatile) const;
-  void RecordWriteField64(mirror::Object* obj, MemberOffset field_offset, uint64_t value,
+  void RecordWriteField64(mirror::Object* obj,
+                          MemberOffset field_offset,
+                          uint64_t value,
                           bool is_volatile) const;
-  void RecordWriteFieldReference(mirror::Object* obj, MemberOffset field_offset,
-                                 mirror::Object* value, bool is_volatile) const;
+  void RecordWriteFieldReference(mirror::Object* obj,
+                                 MemberOffset field_offset,
+                                 mirror::Object* value,
+                                 bool is_volatile) const;
   void RecordWriteArray(mirror::Array* array, size_t index, uint64_t value) const
       SHARED_REQUIRES(Locks::mutator_lock_);
   void RecordStrongStringInsertion(mirror::String* s) const
@@ -499,7 +515,7 @@
     return fault_message_;
   }
 
-  void AddCurrentRuntimeFeaturesAsDex2OatArguments(std::vector<std::string>* arg_vector) const;
+  void AddCurrentRuntimeFeaturesAsDex2OatArguments(out<std::vector<std::string>> arg_vector) const;
 
   bool ExplicitStackOverflowChecks() const {
     return !implicit_so_checks_;
diff --git a/runtime/stack.cc b/runtime/stack.cc
index b07b244..2916eaa 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -19,6 +19,7 @@
 #include "arch/context.h"
 #include "art_method-inl.h"
 #include "base/hex_dump.h"
+#include "base/out.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc_map.h"
@@ -180,7 +181,7 @@
     } else {
       uint16_t reg = code_item->registers_size_ - code_item->ins_size_;
       uint32_t value = 0;
-      bool success = GetVReg(m, reg, kReferenceVReg, &value);
+      bool success = GetVReg(m, reg, kReferenceVReg, outof(value));
       // We currently always guarantee the `this` object is live throughout the method.
       CHECK(success) << "Failed to read the this object in " << PrettyMethod(m);
       return reinterpret_cast<mirror::Object*>(value);
@@ -375,8 +376,8 @@
   QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
   uint32_t vmap_offset_lo, vmap_offset_hi;
   // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
-      vmap_table.IsInContext(vreg + 1, kind_hi, &vmap_offset_hi)) {
+  if (vmap_table.IsInContext(vreg, kind_lo, outof(vmap_offset_lo)) &&
+      vmap_table.IsInContext(vreg + 1, kind_hi, outof(vmap_offset_hi))) {
     bool is_float = (kind_lo == kDoubleLoVReg);
     uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
     uint32_t reg_lo = vmap_table.ComputeRegister(spill_mask, vmap_offset_lo, kind_lo);
@@ -399,8 +400,8 @@
                                                 uint64_t* val) const {
   uint32_t low_32bits;
   uint32_t high_32bits;
-  bool success = GetVRegFromOptimizedCode(m, vreg, kind_lo, &low_32bits);
-  success &= GetVRegFromOptimizedCode(m, vreg + 1, kind_hi, &high_32bits);
+  bool success = GetVRegFromOptimizedCode(m, vreg, kind_lo, outof(low_32bits));
+  success &= GetVRegFromOptimizedCode(m, vreg + 1, kind_hi, outof(high_32bits));
   if (success) {
     *val = (static_cast<uint64_t>(high_32bits) << 32) | static_cast<uint64_t>(low_32bits);
   }
@@ -452,7 +453,7 @@
   QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
   uint32_t vmap_offset;
   // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
+  if (vmap_table.IsInContext(vreg, kind, outof(vmap_offset))) {
     bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
     uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
     uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
@@ -532,8 +533,8 @@
   QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
   uint32_t vmap_offset_lo, vmap_offset_hi;
   // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
-      vmap_table.IsInContext(vreg + 1, kind_hi, &vmap_offset_hi)) {
+  if (vmap_table.IsInContext(vreg, kind_lo, outof(vmap_offset_lo)) &&
+      vmap_table.IsInContext(vreg + 1, kind_hi, outof(vmap_offset_hi))) {
     bool is_float = (kind_lo == kDoubleLoVReg);
     uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
     uint32_t reg_lo = vmap_table.ComputeRegister(spill_mask, vmap_offset_lo, kind_lo);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6949b0b..b3efad0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2139,7 +2139,7 @@
   std::string str(ss.str());
   // log to stderr for debugging command line processes
   std::cerr << str;
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   // log to logcat for debugging frameworks processes
   LOG(INFO) << str;
 #endif
@@ -2304,6 +2304,7 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuffer)
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
   QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index 9d54eba..9563b99 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -44,7 +44,7 @@
 
 void Thread::SetUpAlternateSignalStack() {
   // Create and set an alternate signal stack.
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   LOG(FATAL) << "Invalid use of alternate signal stack on Android";
 #endif
   stack_t ss;
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index d63b455..0181e5b 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -2874,6 +2874,13 @@
           }
         }
       }
+      // Handle this like a RETURN_VOID now. Code is duplicated to separate standard from
+      // quickened opcodes (otherwise this could be a fall-through).
+      if (!IsConstructor()) {
+        if (!GetMethodReturnType().IsConflict()) {
+          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-void not expected";
+        }
+      }
       break;
     // Note: the following instructions encode offsets derived from class linking.
     // As such they use Class*/Field*/AbstractMethod* as these offsets only have
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 9319cc2..7fe8bb9 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -302,7 +302,9 @@
 PreciseReferenceType::PreciseReferenceType(mirror::Class* klass, const std::string& descriptor,
                                            uint16_t cache_id)
     : RegType(klass, descriptor, cache_id) {
-  DCHECK(klass->IsInstantiable());
+  // Note: no check for IsInstantiable() here. We may produce this in case an InstantiationError
+  //       would be thrown at runtime, but we need to continue verification and *not* create a
+  //       hard failure or abort.
 }
 
 std::string UnresolvedMergedType::Dump() const {
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index d656500..4469e64 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -427,9 +427,18 @@
         }
       }
       entry = new ReferenceType(klass, "", entries_.size());
-    } else if (klass->IsInstantiable()) {
+    } else if (!klass->IsPrimitive()) {
       // We're uninitialized because of allocation, look or create a precise type as allocations
       // may only create objects of that type.
+      // Note: we do not check whether the given klass is actually instantiable (besides being
+      //       primitive), that is, we allow interfaces and abstract classes here. The reasoning is
+      //       twofold:
+      //       1) The "new-instance" instruction to generate the uninitialized type will already
+      //          queue an instantiation error. This is a soft error that must be thrown at runtime,
+      //          and could potentially change if the class is resolved differently at runtime.
+      //       2) Checking whether the klass is instantiable and using conflict may produce a hard
+      //          error when the value is used, which leads to a VerifyError, which is not the
+      //          correct semantics.
       for (size_t i = primitive_count_; i < entries_.size(); i++) {
         const RegType* cur_entry = entries_[i];
         if (cur_entry->IsPreciseReference() && cur_entry->GetClass() == klass) {
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index 1391d14..c984b17 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 #include <android/log.h>
 #else
 #include <stdarg.h>
@@ -103,7 +103,7 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
 #else
   std::cout << buf << "\n";
@@ -337,14 +337,16 @@
   // In case the chain isn't claimed, claim it for ourself so we can ensure the managed handler
   // goes first.
   if (!user_sigactions[signal].IsClaimed()) {
-    struct sigaction tmp;
-    tmp.sa_sigaction = sigchainlib_managed_handler_sigaction;
-    sigemptyset(&tmp.sa_mask);
-    tmp.sa_flags = SA_SIGINFO | SA_ONSTACK;
+    struct sigaction act, old_act;
+    act.sa_sigaction = sigchainlib_managed_handler_sigaction;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO | SA_ONSTACK;
 #if !defined(__APPLE__) && !defined(__mips__)
-    tmp.sa_restorer = nullptr;
+    act.sa_restorer = nullptr;
 #endif
-    user_sigactions[signal].Claim(tmp);
+    if (sigaction(signal, &act, &old_act) != -1) {
+      user_sigactions[signal].Claim(old_act);
+    }
   }
 }
 
diff --git a/sigchainlib/sigchain_dummy.cc b/sigchainlib/sigchain_dummy.cc
index 8495a54..dfe0c6f 100644
--- a/sigchainlib/sigchain_dummy.cc
+++ b/sigchainlib/sigchain_dummy.cc
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
 #include <android/log.h>
 #else
 #include <stdarg.h>
@@ -38,7 +38,7 @@
   va_list ap;
   va_start(ap, format);
   vsnprintf(buf, sizeof(buf), format, ap);
-#ifdef HAVE_ANDROID_OS
+#ifdef __ANDROID__
   __android_log_write(ANDROID_LOG_ERROR, "libsigchain", buf);
 #else
   std::cout << buf << "\n";
diff --git a/test/051-thread/thread_test.cc b/test/051-thread/thread_test.cc
index 2b8e675..4215207 100644
--- a/test/051-thread/thread_test.cc
+++ b/test/051-thread/thread_test.cc
@@ -28,7 +28,7 @@
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_supportsThreadPriorities(
     JNIEnv* env ATTRIBUTE_UNUSED,
     jclass clazz ATTRIBUTE_UNUSED) {
-#if defined(HAVE_ANDROID_OS)
+#if defined(__ANDROID__)
   return JNI_TRUE;
 #else
   return JNI_FALSE;
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 177c5a4..77c1a99 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -45,6 +45,8 @@
     test_Long_reverseBytes();
     test_Integer_reverse();
     test_Long_reverse();
+    test_Integer_numberOfLeadingZeros();
+    test_Long_numberOfLeadingZeros();
     test_StrictMath_abs_I();
     test_StrictMath_abs_J();
     test_StrictMath_min_I();
@@ -1041,6 +1043,24 @@
     return (r1 / i1) + (r2 / i2) + i3 + i4 + i5 + i6 + i7 + i8;
   }
 
+  public static void test_Integer_numberOfLeadingZeros() {
+    Assert.assertEquals(Integer.numberOfLeadingZeros(0), Integer.SIZE);
+    for (int i = 0; i < Integer.SIZE; i++) {
+        Assert.assertEquals(Integer.numberOfLeadingZeros(1 << i), Integer.SIZE - 1 - i);
+        Assert.assertEquals(Integer.numberOfLeadingZeros((1 << i) | 1), Integer.SIZE - 1 - i);
+        Assert.assertEquals(Integer.numberOfLeadingZeros(0xFFFFFFFF >>> i), i);
+    }
+  }
+
+  public static void test_Long_numberOfLeadingZeros() {
+    Assert.assertEquals(Long.numberOfLeadingZeros(0L), Long.SIZE);
+    for (int i = 0; i < Long.SIZE; i++) {
+        Assert.assertEquals(Long.numberOfLeadingZeros(1L << i), Long.SIZE - 1 - i);
+        Assert.assertEquals(Long.numberOfLeadingZeros((1L << i) | 1L), Long.SIZE - 1 - i);
+        Assert.assertEquals(Long.numberOfLeadingZeros(0xFFFFFFFFFFFFFFFFL >>> i), i);
+    }
+  }
+
   static Object runtime;
   static Method address_of;
   static Method new_non_movable_array;
diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt
index 464d2c8..372ecd0 100644
--- a/test/115-native-bridge/expected.txt
+++ b/test/115-native-bridge/expected.txt
@@ -61,3 +61,4 @@
 trampoline_Java_Main_testNewStringObject called!
 Getting trampoline for Java_Main_testSignal with shorty I.
 NB signal handler with signal 11.
+NB signal handler with signal 4.
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index c8141a7..a6a6e08 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -200,8 +200,9 @@
 #if !defined(__APPLE__) && !defined(__mips__)
   tmp.sa_restorer = nullptr;
 #endif
-  sigaction(SIGSEGV, &tmp, nullptr);
 
+  // Test segv
+  sigaction(SIGSEGV, &tmp, nullptr);
 #if defined(__arm__) || defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
   // On supported architectures we cause a real SEGV.
   *go_away_compiler = 'a';
@@ -209,6 +210,11 @@
   // On other architectures we simulate SEGV.
   kill(getpid(), SIGSEGV);
 #endif
+
+  // Test sigill
+  sigaction(SIGILL, &tmp, nullptr);
+  kill(getpid(), SIGILL);
+
   return 1234;
 }
 
@@ -385,27 +391,29 @@
 // 004-SignalTest.
 static bool nb_signalhandler(int sig, siginfo_t* info ATTRIBUTE_UNUSED, void* context) {
   printf("NB signal handler with signal %d.\n", sig);
+  if (sig == SIGSEGV) {
 #if defined(__arm__)
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  sc->arm_pc += 2;          // Skip instruction causing segv.
+    struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+    struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+    sc->arm_pc += 2;          // Skip instruction causing segv & sigill.
 #elif defined(__aarch64__)
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  sc->pc += 4;          // Skip instruction causing segv.
+    struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+    struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+    sc->pc += 4;          // Skip instruction causing segv & sigill.
 #elif defined(__i386__) || defined(__x86_64__)
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  uc->CTX_EIP += 3;
+    struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+    uc->CTX_EIP += 3;
 #else
-  UNUSED(context);
+    UNUSED(context);
 #endif
+  }
   // We handled this...
   return true;
 }
 
 static ::android::NativeBridgeSignalHandlerFn native_bridge_get_signal_handler(int signal) {
-  // Only test segfault handler.
-  if (signal == SIGSEGV) {
+  // Test segv for already claimed signal, and sigill for not claimed signal
+  if ((signal == SIGSEGV) || (signal == SIGILL)) {
     return &nb_signalhandler;
   }
   return nullptr;
diff --git a/test/401-optimizing-compiler/src/Main.java b/test/401-optimizing-compiler/src/Main.java
index a1e62b3..f2e4518 100644
--- a/test/401-optimizing-compiler/src/Main.java
+++ b/test/401-optimizing-compiler/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 
 public class Main {
diff --git a/test/402-optimizing-control-flow/src/Main.java b/test/402-optimizing-control-flow/src/Main.java
index c9c24dd..4c93d26 100644
--- a/test/402-optimizing-control-flow/src/Main.java
+++ b/test/402-optimizing-control-flow/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 
 public class Main {
diff --git a/test/403-optimizing-long/src/Main.java b/test/403-optimizing-long/src/Main.java
index 21af4e1..5927d1c 100644
--- a/test/403-optimizing-long/src/Main.java
+++ b/test/403-optimizing-long/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 
 public class Main {
diff --git a/test/404-optimizing-allocator/src/Main.java b/test/404-optimizing-allocator/src/Main.java
index 7b31820..1ff5475 100644
--- a/test/404-optimizing-allocator/src/Main.java
+++ b/test/404-optimizing-allocator/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$reg$ is a marker for the optimizing compiler to ensure
+// Note that $opt$reg$ is a marker for the optimizing compiler to test
 // it does use its register allocator.
 
 public class Main {
diff --git a/test/405-optimizing-long-allocator/src/Main.java b/test/405-optimizing-long-allocator/src/Main.java
index 9fd840b..a0e0bb5 100644
--- a/test/405-optimizing-long-allocator/src/Main.java
+++ b/test/405-optimizing-long-allocator/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it compiles these methods.
 
 public class Main {
diff --git a/test/411-optimizing-arith-mul/src/Main.java b/test/411-optimizing-arith-mul/src/Main.java
index 3a5d7c0..60e418e 100644
--- a/test/411-optimizing-arith-mul/src/Main.java
+++ b/test/411-optimizing-arith-mul/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 public class Main {
 
diff --git a/test/412-new-array/src/Main.java b/test/412-new-array/src/Main.java
index e4669b8..b9c2a05 100644
--- a/test/412-new-array/src/Main.java
+++ b/test/412-new-array/src/Main.java
@@ -17,7 +17,7 @@
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 
 public class Main extends TestCase {
diff --git a/test/414-optimizing-arith-sub/src/Main.java b/test/414-optimizing-arith-sub/src/Main.java
index 30e8436..b4531cd 100644
--- a/test/414-optimizing-arith-sub/src/Main.java
+++ b/test/414-optimizing-arith-sub/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 public class Main {
 
diff --git a/test/415-optimizing-arith-neg/src/Main.java b/test/415-optimizing-arith-neg/src/Main.java
index bd8a158..cabf635 100644
--- a/test/415-optimizing-arith-neg/src/Main.java
+++ b/test/415-optimizing-arith-neg/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 public class Main {
 
diff --git a/test/417-optimizing-arith-div/src/Main.java b/test/417-optimizing-arith-div/src/Main.java
index 909ceb4..68e89b3 100644
--- a/test/417-optimizing-arith-div/src/Main.java
+++ b/test/417-optimizing-arith-div/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 public class Main {
 
diff --git a/test/421-large-frame/src/Main.java b/test/421-large-frame/src/Main.java
index 81896ab..6717ba0 100644
--- a/test/421-large-frame/src/Main.java
+++ b/test/421-large-frame/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 public class Main {
 
diff --git a/test/422-type-conversion/src/Main.java b/test/422-type-conversion/src/Main.java
index 9f8f417..146f309 100644
--- a/test/422-type-conversion/src/Main.java
+++ b/test/422-type-conversion/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 public class Main {
 
diff --git a/test/427-bitwise/src/Main.java b/test/427-bitwise/src/Main.java
index e984066..aa69554 100644
--- a/test/427-bitwise/src/Main.java
+++ b/test/427-bitwise/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 public class Main {
 
diff --git a/test/441-checker-inliner/src/Main.java b/test/441-checker-inliner/src/Main.java
index 4db116a..c108a90 100644
--- a/test/441-checker-inliner/src/Main.java
+++ b/test/441-checker-inliner/src/Main.java
@@ -157,6 +157,31 @@
     return x;
   }
 
+  /// CHECK-START: int Main.returnAbs(int) intrinsics_recognition (before)
+  /// CHECK-DAG:     <<Result:i\d+>>      InvokeStaticOrDirect
+  /// CHECK-DAG:                          Return [<<Result>>]
+
+  /// CHECK-START: int Main.returnAbs(int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>      InvokeStaticOrDirect intrinsic:MathAbsInt
+  /// CHECK-DAG:                          Return [<<Result>>]
+
+  private static int returnAbs(int i) {
+    return Math.abs(i);
+  }
+
+  /// CHECK-START: int Main.InlinedIntrinsicsAreStillIntrinsic() inliner (before)
+  /// CHECK-DAG:     <<ConstMinus1:i\d+>> IntConstant -1
+  /// CHECK-DAG:     <<Result:i\d+>>      InvokeStaticOrDirect
+  /// CHECK-DAG:                          Return [<<Result>>]
+
+  /// CHECK-START: int Main.InlinedIntrinsicsAreStillIntrinsic() inliner (after)
+  /// CHECK-DAG:     <<ConstMinus1:i\d+>> IntConstant -1
+  /// CHECK-DAG:     <<Result:i\d+>>      InvokeStaticOrDirect intrinsic:MathAbsInt
+  /// CHECK-DAG:                          Return [<<Result>>]
+
+  public static int InlinedIntrinsicsAreStillIntrinsic() {
+    return returnAbs(-1);
+  }
 
   private static void returnVoid() {
     return;
@@ -238,5 +263,13 @@
     if (InlineWithControlFlow(false) != 2) {
       throw new Error();
     }
+
+    if (InlinedIntrinsicsAreStillIntrinsic() != 1) {
+      throw new Error();
+    }
+
+    if (returnAbs(-1) != 1) {
+      throw new Error();
+    }
   }
 }
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index 014f59a..251a53e 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-
 interface Interface {
   void $noinline$f();
 }
@@ -52,6 +51,15 @@
   }
 }
 
+class Generic<A> {
+  private A a = null;
+  public A get() {
+    return a;
+  }
+}
+
+final class Final {}
+
 public class Main {
 
   /// CHECK-START: void Main.testSimpleRemove() instruction_simplifier_after_types (before)
@@ -395,6 +403,104 @@
     ((SubclassA)a[0]).$noinline$g();
   }
 
+  private Generic<SubclassC> genericC = new Generic<SubclassC>();
+  private Generic<Final> genericFinal = new Generic<Final>();
+
+  private SubclassC get() {
+    return genericC.get();
+  }
+
+  private Final getFinal() {
+    return genericFinal.get();
+  }
+
+  /// CHECK-START: SubclassC Main.inlineGenerics() reference_type_propagation (after)
+  /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:SubclassC exact:false
+  /// CHECK-NEXT:                    Return [<<Invoke>>]
+
+  /// CHECK-START: SubclassC Main.inlineGenerics() reference_type_propagation_after_inlining (after)
+  /// CHECK:      <<BoundType:l\d+>> BoundType klass:SubclassC exact:false
+  /// CHECK:                         Return [<<BoundType>>]
+  private SubclassC inlineGenerics() {
+    SubclassC c = get();
+    return c;
+  }
+
+  /// CHECK-START: Final Main.inlineGenericsFinal() reference_type_propagation (after)
+  /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:Final exact:true
+  /// CHECK-NEXT:                    Return [<<Invoke>>]
+
+  /// CHECK-START: Final Main.inlineGenericsFinal() reference_type_propagation_after_inlining (after)
+  /// CHECK:      <<BoundType:l\d+>> BoundType klass:Final exact:true
+  /// CHECK:                         Return [<<BoundType>>]
+  private Final inlineGenericsFinal() {
+    Final f = getFinal();
+    return f;
+  }
+
+  /// CHECK-START: void Main.boundOnlyOnceIfNotNull(java.lang.Object) reference_type_propagation_after_inlining (after)
+  /// CHECK:      BoundType
+  /// CHECK-NOT:  BoundType
+  private void boundOnlyOnceIfNotNull(Object o) {
+    if (o != null) {
+      o.toString();
+    }
+  }
+
+  /// CHECK-START: void Main.boundOnlyOnceIfInstanceOf(java.lang.Object) reference_type_propagation_after_inlining (after)
+  /// CHECK:      BoundType
+  /// CHECK-NOT:  BoundType
+  private void boundOnlyOnceIfInstanceOf(Object o) {
+    if (o instanceof Main) {
+      o.toString();
+    }
+  }
+
+  /// CHECK-START: Final Main.boundOnlyOnceCheckCast(Generic) reference_type_propagation_after_inlining (after)
+  /// CHECK:      BoundType
+  /// CHECK-NOT:  BoundType
+  private Final boundOnlyOnceCheckCast(Generic<Final> o) {
+    Final f = o.get();
+    return f;
+  }
+
+  private Super getSuper() {
+    return new SubclassA();
+  }
+
+  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>> Phi klass:Super
+  /// CHECK:                   NullCheck [<<Phi>>] klass:Super
+
+  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) reference_type_propagation_after_inlining (after)
+  /// CHECK:      <<Phi:l\d+>> Phi klass:SubclassA
+  /// CHECK:                   NullCheck [<<Phi>>] klass:SubclassA
+  private void updateNodesInTheSameBlockAsPhi(boolean cond) {
+    Super s = getSuper();
+    if (cond) {
+      s = new SubclassA();
+    }
+    s.$noinline$f();
+  }
+
+  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) reference_type_propagation_after_inlining (after)
+  /// CHECK:      <<This:l\d+>>     ParameterValue
+  /// CHECK:      <<Param:l\d+>>    ParameterValue
+  /// CHECK:      <<Clazz:l\d+>>    LoadClass
+  /// CHECK:                        CheckCast [<<Param>>,<<Clazz>>]
+  /// CHECK:                        BoundType [<<Param>>] can_be_null:true
+
+  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) instruction_simplifier_after_types (after)
+  /// CHECK:      <<This:l\d+>>     ParameterValue
+  /// CHECK:      <<Param:l\d+>>    ParameterValue
+  /// CHECK:      <<Clazz:l\d+>>    LoadClass
+  /// CHECK:                        CheckCast [<<Param>>,<<Clazz>>]
+  /// CHECK:      <<Bound:l\d+>>    BoundType [<<Param>>]
+  /// CHECK:                        NullCheck [<<Bound>>]
+  public String checkcastPreserveNullCheck(Object a) {
+    return ((SubclassA)a).toString();
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/test/477-long-to-float-conversion-precision/src/Main.java b/test/477-long-to-float-conversion-precision/src/Main.java
index cd97039..568bc04 100644
--- a/test/477-long-to-float-conversion-precision/src/Main.java
+++ b/test/477-long-to-float-conversion-precision/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 public class Main {
 
diff --git a/test/705-register-conflict/src/Main.java b/test/705-register-conflict/src/Main.java
index 42c79fb..9ae10ec 100644
--- a/test/705-register-conflict/src/Main.java
+++ b/test/705-register-conflict/src/Main.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Note that $opt$ is a marker for the optimizing compiler to ensure
+// Note that $opt$ is a marker for the optimizing compiler to test
 // it does compile the method.
 
 public class Main {
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index fd9fcaf..728ccea 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -36,4 +36,5 @@
 b/22411633 (3)
 b/22411633 (4)
 b/22411633 (5)
+b/22777307
 Done!
diff --git a/test/800-smali/smali/b_22777307.smali b/test/800-smali/smali/b_22777307.smali
new file mode 100644
index 0000000..6de3c70
--- /dev/null
+++ b/test/800-smali/smali/b_22777307.smali
@@ -0,0 +1,18 @@
+.class public LB22777307;
+.super Ljava/lang/Object;
+
+# A static field. That way we can use the reference.
+.field private static sTest:Ljava/lang/Object;
+
+.method public static run()V
+.registers 2
+       # This is a broken new-instance. It needs to throw at runtime, though. This test is here to
+       # ensure we won't produce a VerifyError.
+       # Cloneable was chosen because it's an already existing interface.
+       new-instance v0, Ljava/lang/Cloneable;
+       invoke-direct {v0}, Ljava/lang/Cloneable;-><init>()V
+       sput-object v0, LB22777307;->sTest:Ljava/lang/Object;
+
+       return-void
+
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 8da2af4..438e214 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -119,6 +119,8 @@
                 new VerifyError(), null));
         testCases.add(new TestCase("b/22411633 (5)", "B22411633_5", "run", new Object[] { false },
                 null, null));
+        testCases.add(new TestCase("b/22777307", "B22777307", "run", null, new InstantiationError(),
+                null));
     }
 
     public void runTests() {
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 992a8a6..7ada189 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -150,5 +150,12 @@
   result: EXEC_FAILED,
   modes: [device],
   names: ["org.apache.harmony.tests.java.lang.ClassTest#test_forNameLjava_lang_String"]
+},
+{
+  description: "TimeZoneTest.testAllDisplayNames times out, needs investigation",
+  result: EXEC_TIMEOUT,
+  modes: [device],
+  names: ["libcore.java.util.TimeZoneTest#testAllDisplayNames"],
+  bug: 22786792
 }
 ]