Merge "Long support in optimizing compiler."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 1f8f799..24acc56 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -60,6 +60,7 @@
 	runtime/verifier/method_verifier_test.cc \
 	runtime/verifier/reg_type_test.cc \
 	runtime/zip_archive_test.cc \
+	runtime/stack_indirect_reference_table_test.cc
 
 COMPILER_GTEST_COMMON_SRC_FILES := \
 	runtime/jni_internal_test.cc \
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 9a21da0..fdf09a5 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -299,7 +299,7 @@
 
       // for ARM, do a runtime check to make sure that the features we are passed from
       // the build match the features we actually determine at runtime.
-      ASSERT_EQ(instruction_set_features, runtime_features);
+      ASSERT_LE(instruction_set_features, runtime_features);
 #elif defined(__aarch64__)
       instruction_set = kArm64;
       // TODO: arm64 compilation support.
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a241d51..2b20c6f 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -19,10 +19,8 @@
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 #include <utils/Trace.h>
 
-#include <fstream>
 #include <vector>
 #include <unistd.h>
-#include <utility>
 
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
@@ -372,7 +370,7 @@
 
   // Read the profile file if one is provided.
   if (profile_file != "") {
-    profile_ok_ = ReadProfile(profile_file);
+    profile_ok_ = ProfileHelper::LoadProfileMap(profile_map_, profile_file);
   }
 
   dex_to_dex_compiler_ = reinterpret_cast<DexToDexCompilerFn>(ArtCompileDEX);
@@ -2036,86 +2034,9 @@
     }
   }
 
-bool CompilerDriver::ReadProfile(const std::string& filename) {
-  VLOG(compiler) << "reading profile file " << filename;
-  struct stat st;
-  int err = stat(filename.c_str(), &st);
-  if (err == -1) {
-    VLOG(compiler) << "not found";
-    return false;
-  }
-  std::ifstream in(filename.c_str());
-  if (!in) {
-    VLOG(compiler) << "profile file " << filename << " exists but can't be opened";
-    VLOG(compiler) << "file owner: " << st.st_uid << ":" << st.st_gid;
-    VLOG(compiler) << "me: " << getuid() << ":" << getgid();
-    VLOG(compiler) << "file permissions: " << std::oct << st.st_mode;
-    VLOG(compiler) << "errno: " << errno;
-    return false;
-  }
-  // The first line contains summary information.
-  std::string line;
-  std::getline(in, line);
-  if (in.eof()) {
-    return false;
-  }
-  std::vector<std::string> summary_info;
-  Split(line, '/', summary_info);
-  if (summary_info.size() != 3) {
-    // Bad summary info.  It should be count/total/bootpath.
-    return false;
-  }
-  // This is the number of hits in all methods.
-  uint32_t total_count = 0;
-  for (int i = 0 ; i < 3; ++i) {
-    total_count += atoi(summary_info[i].c_str());
-  }
-
-  // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
-  // Store the info in descending order given by the most used methods.
-  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
-  ProfileSet countSet;
-  while (!in.eof()) {
-    std::getline(in, line);
-    if (in.eof()) {
-      break;
-    }
-    std::vector<std::string> info;
-    Split(line, '/', info);
-    if (info.size() != 3) {
-      // Malformed.
-      break;
-    }
-    int count = atoi(info[1].c_str());
-    countSet.insert(std::make_pair(-count, info));
-  }
-
-  uint32_t curTotalCount = 0;
-  ProfileSet::iterator end = countSet.end();
-  const ProfileData* prevData = nullptr;
-  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
-    const std::string& methodname = it->second[0];
-    uint32_t count = -it->first;
-    uint32_t size = atoi(it->second[2].c_str());
-    double usedPercent = (count * 100.0) / total_count;
-
-    curTotalCount += count;
-    // Methods with the same count should be part of the same top K percentage bucket.
-    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
-      ? prevData->GetTopKUsedPercentage()
-      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
-
-    // Add it to the profile map.
-    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
-    profile_map_[methodname] = curData;
-    prevData = &curData;
-  }
-  return true;
-}
-
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_ok_) {
-    return true;
+    return false;
   }
   // Methods that comprise topKPercentThreshold % of the total samples will be compiled.
   double topKPercentThreshold = 90.0;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 802f859..d49523a 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -32,6 +32,7 @@
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "os.h"
+#include "profiler.h"
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -594,43 +595,9 @@
     return cfi_info_.get();
   }
 
-  // Profile data.  This is generated from previous runs of the program and stored
-  // in a file.  It is used to determine whether to compile a particular method or not.
-  class ProfileData {
-   public:
-    ProfileData() : count_(0), method_size_(0), usedPercent_(0) {}
-    ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
-      double usedPercent, double topKUsedPercentage) :
-      method_name_(method_name), count_(count), method_size_(method_size),
-      usedPercent_(usedPercent), topKUsedPercentage_(topKUsedPercentage) {
-      // TODO: currently method_size_ and count_ are unused.
-      UNUSED(method_size_);
-      UNUSED(count_);
-    }
-
-    bool IsAbove(double v) const { return usedPercent_ >= v; }
-    double GetUsedPercent() const { return usedPercent_; }
-    uint32_t GetCount() const { return count_; }
-    double GetTopKUsedPercentage() const { return topKUsedPercentage_; }
-
-   private:
-    std::string method_name_;    // Method name.
-    uint32_t count_;             // Number of times it has been called.
-    uint32_t method_size_;       // Size of the method on dex instructions.
-    double usedPercent_;         // Percentage of how many times this method was called.
-    double topKUsedPercentage_;  // The percentage of the group that comprise K% of the total used
-                                 // methods this methods belongs to.
-  };
-
-  // Profile data is stored in a map, indexed by the full method name.
-  typedef std::map<const std::string, ProfileData> ProfileMap;
   ProfileMap profile_map_;
   bool profile_ok_;
 
-  // Read the profile data from the given file.  Calculates the percentage for each method.
-  // Returns false if there was no profile file or it was malformed.
-  bool ReadProfile(const std::string& filename);
-
   // Should the compiler run on this method given profile information?
   bool SkipCompilation(const std::string& method_name);
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 0405198..f76587a 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -235,8 +235,8 @@
   }
 
   // Create the image bitmap.
-  image_bitmap_.reset(gc::accounting::SpaceBitmap::Create("image bitmap", image_->Begin(),
-                                                          length));
+  image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create("image bitmap", image_->Begin(),
+                                                                    length));
   if (image_bitmap_.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate memory for image bitmap";
     return false;
@@ -525,7 +525,7 @@
 
   // Return to write header at start of image with future location of image_roots. At this point,
   // image_end_ is the size of the image (excluding bitmaps).
-  const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * gc::accounting::SpaceBitmap::kAlignment;
+  const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * kObjectAlignment;
   const size_t bitmap_bytes = RoundUp(image_end_, heap_bytes_per_bitmap_byte) /
       heap_bytes_per_bitmap_byte;
   ImageHeader image_header(PointerToLowMemUInt32(image_begin_),
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 92b24f6..ee241cb 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -173,7 +173,7 @@
   const byte* oat_data_begin_;
 
   // Image bitmap which lets us know where the objects inside of the image reside.
-  UniquePtr<gc::accounting::SpaceBitmap> image_bitmap_;
+  UniquePtr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
 
   // Offset from oat_data_begin_ to the stubs.
   uint32_t interpreter_to_interpreter_bridge_offset_;
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index a0797f3..6b5e55e 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -1278,4 +1278,217 @@
   EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE);
 }
 
+template <typename U, typename V> V convert(U in) {
+  DCHECK_LE(sizeof(U), sizeof(V));
+  union { U u; V v; } tmp;
+  tmp.u = in;
+  return tmp.v;
+}
+
+void Java_MyClassNatives_stackArgsIntsFirst(JNIEnv* env, jclass klass, jint i1, jint i2, jint i3,
+                                            jint i4, jint i5, jint i6, jint i7, jint i8, jint i9,
+                                            jint i10, jfloat f1, jfloat f2, jfloat f3, jfloat f4,
+                                            jfloat f5, jfloat f6, jfloat f7, jfloat f8, jfloat f9,
+                                            jfloat f10) {
+  EXPECT_EQ(i1, 1);
+  EXPECT_EQ(i2, 2);
+  EXPECT_EQ(i3, 3);
+  EXPECT_EQ(i4, 4);
+  EXPECT_EQ(i5, 5);
+  EXPECT_EQ(i6, 6);
+  EXPECT_EQ(i7, 7);
+  EXPECT_EQ(i8, 8);
+  EXPECT_EQ(i9, 9);
+  EXPECT_EQ(i10, 10);
+
+  jint i11 = convert<jfloat, jint>(f1);
+  EXPECT_EQ(i11, 11);
+  jint i12 = convert<jfloat, jint>(f2);
+  EXPECT_EQ(i12, 12);
+  jint i13 = convert<jfloat, jint>(f3);
+  EXPECT_EQ(i13, 13);
+  jint i14 = convert<jfloat, jint>(f4);
+  EXPECT_EQ(i14, 14);
+  jint i15 = convert<jfloat, jint>(f5);
+  EXPECT_EQ(i15, 15);
+  jint i16 = convert<jfloat, jint>(f6);
+  EXPECT_EQ(i16, 16);
+  jint i17 = convert<jfloat, jint>(f7);
+  EXPECT_EQ(i17, 17);
+  jint i18 = convert<jfloat, jint>(f8);
+  EXPECT_EQ(i18, 18);
+  jint i19 = convert<jfloat, jint>(f9);
+  EXPECT_EQ(i19, 19);
+  jint i20 = convert<jfloat, jint>(f10);
+  EXPECT_EQ(i20, 20);
+}
+
+TEST_F(JniCompilerTest, StackArgsIntsFirst) {
+  TEST_DISABLED_FOR_PORTABLE();
+  SetUpForTest(true, "stackArgsIntsFirst", "(IIIIIIIIIIFFFFFFFFFF)V",
+               reinterpret_cast<void*>(&Java_MyClassNatives_stackArgsIntsFirst));
+
+  jint i1 = 1;
+  jint i2 = 2;
+  jint i3 = 3;
+  jint i4 = 4;
+  jint i5 = 5;
+  jint i6 = 6;
+  jint i7 = 7;
+  jint i8 = 8;
+  jint i9 = 9;
+  jint i10 = 10;
+
+  jfloat f1 = convert<jint, jfloat>(11);
+  jfloat f2 = convert<jint, jfloat>(12);
+  jfloat f3 = convert<jint, jfloat>(13);
+  jfloat f4 = convert<jint, jfloat>(14);
+  jfloat f5 = convert<jint, jfloat>(15);
+  jfloat f6 = convert<jint, jfloat>(16);
+  jfloat f7 = convert<jint, jfloat>(17);
+  jfloat f8 = convert<jint, jfloat>(18);
+  jfloat f9 = convert<jint, jfloat>(19);
+  jfloat f10 = convert<jint, jfloat>(20);
+
+  env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, f1, f2,
+                             f3, f4, f5, f6, f7, f8, f9, f10);
+}
+
+void Java_MyClassNatives_stackArgsFloatsFirst(JNIEnv* env, jclass klass, jfloat f1, jfloat f2,
+                                              jfloat f3, jfloat f4, jfloat f5, jfloat f6, jfloat f7,
+                                              jfloat f8, jfloat f9, jfloat f10, jint i1, jint i2,
+                                              jint i3, jint i4, jint i5, jint i6, jint i7, jint i8,
+                                              jint i9, jint i10) {
+  EXPECT_EQ(i1, 1);
+  EXPECT_EQ(i2, 2);
+  EXPECT_EQ(i3, 3);
+  EXPECT_EQ(i4, 4);
+  EXPECT_EQ(i5, 5);
+  EXPECT_EQ(i6, 6);
+  EXPECT_EQ(i7, 7);
+  EXPECT_EQ(i8, 8);
+  EXPECT_EQ(i9, 9);
+  EXPECT_EQ(i10, 10);
+
+  jint i11 = convert<jfloat, jint>(f1);
+  EXPECT_EQ(i11, 11);
+  jint i12 = convert<jfloat, jint>(f2);
+  EXPECT_EQ(i12, 12);
+  jint i13 = convert<jfloat, jint>(f3);
+  EXPECT_EQ(i13, 13);
+  jint i14 = convert<jfloat, jint>(f4);
+  EXPECT_EQ(i14, 14);
+  jint i15 = convert<jfloat, jint>(f5);
+  EXPECT_EQ(i15, 15);
+  jint i16 = convert<jfloat, jint>(f6);
+  EXPECT_EQ(i16, 16);
+  jint i17 = convert<jfloat, jint>(f7);
+  EXPECT_EQ(i17, 17);
+  jint i18 = convert<jfloat, jint>(f8);
+  EXPECT_EQ(i18, 18);
+  jint i19 = convert<jfloat, jint>(f9);
+  EXPECT_EQ(i19, 19);
+  jint i20 = convert<jfloat, jint>(f10);
+  EXPECT_EQ(i20, 20);
+}
+
+TEST_F(JniCompilerTest, StackArgsFloatsFirst) {
+  TEST_DISABLED_FOR_PORTABLE();
+  SetUpForTest(true, "stackArgsFloatsFirst", "(FFFFFFFFFFIIIIIIIIII)V",
+               reinterpret_cast<void*>(&Java_MyClassNatives_stackArgsFloatsFirst));
+
+  jint i1 = 1;
+  jint i2 = 2;
+  jint i3 = 3;
+  jint i4 = 4;
+  jint i5 = 5;
+  jint i6 = 6;
+  jint i7 = 7;
+  jint i8 = 8;
+  jint i9 = 9;
+  jint i10 = 10;
+
+  jfloat f1 = convert<jint, jfloat>(11);
+  jfloat f2 = convert<jint, jfloat>(12);
+  jfloat f3 = convert<jint, jfloat>(13);
+  jfloat f4 = convert<jint, jfloat>(14);
+  jfloat f5 = convert<jint, jfloat>(15);
+  jfloat f6 = convert<jint, jfloat>(16);
+  jfloat f7 = convert<jint, jfloat>(17);
+  jfloat f8 = convert<jint, jfloat>(18);
+  jfloat f9 = convert<jint, jfloat>(19);
+  jfloat f10 = convert<jint, jfloat>(20);
+
+  env_->CallStaticVoidMethod(jklass_, jmethod_, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, i1, i2, i3,
+                             i4, i5, i6, i7, i8, i9, i10);
+}
+
+void Java_MyClassNatives_stackArgsMixed(JNIEnv* env, jclass klass, jint i1, jfloat f1, jint i2,
+                                        jfloat f2, jint i3, jfloat f3, jint i4, jfloat f4, jint i5,
+                                        jfloat f5, jint i6, jfloat f6, jint i7, jfloat f7, jint i8,
+                                        jfloat f8, jint i9, jfloat f9, jint i10, jfloat f10) {
+  EXPECT_EQ(i1, 1);
+  EXPECT_EQ(i2, 2);
+  EXPECT_EQ(i3, 3);
+  EXPECT_EQ(i4, 4);
+  EXPECT_EQ(i5, 5);
+  EXPECT_EQ(i6, 6);
+  EXPECT_EQ(i7, 7);
+  EXPECT_EQ(i8, 8);
+  EXPECT_EQ(i9, 9);
+  EXPECT_EQ(i10, 10);
+
+  jint i11 = convert<jfloat, jint>(f1);
+  EXPECT_EQ(i11, 11);
+  jint i12 = convert<jfloat, jint>(f2);
+  EXPECT_EQ(i12, 12);
+  jint i13 = convert<jfloat, jint>(f3);
+  EXPECT_EQ(i13, 13);
+  jint i14 = convert<jfloat, jint>(f4);
+  EXPECT_EQ(i14, 14);
+  jint i15 = convert<jfloat, jint>(f5);
+  EXPECT_EQ(i15, 15);
+  jint i16 = convert<jfloat, jint>(f6);
+  EXPECT_EQ(i16, 16);
+  jint i17 = convert<jfloat, jint>(f7);
+  EXPECT_EQ(i17, 17);
+  jint i18 = convert<jfloat, jint>(f8);
+  EXPECT_EQ(i18, 18);
+  jint i19 = convert<jfloat, jint>(f9);
+  EXPECT_EQ(i19, 19);
+  jint i20 = convert<jfloat, jint>(f10);
+  EXPECT_EQ(i20, 20);
+}
+
+TEST_F(JniCompilerTest, StackArgsMixed) {
+  TEST_DISABLED_FOR_PORTABLE();
+  SetUpForTest(true, "stackArgsMixed", "(IFIFIFIFIFIFIFIFIFIF)V",
+               reinterpret_cast<void*>(&Java_MyClassNatives_stackArgsMixed));
+
+  jint i1 = 1;
+  jint i2 = 2;
+  jint i3 = 3;
+  jint i4 = 4;
+  jint i5 = 5;
+  jint i6 = 6;
+  jint i7 = 7;
+  jint i8 = 8;
+  jint i9 = 9;
+  jint i10 = 10;
+
+  jfloat f1 = convert<jint, jfloat>(11);
+  jfloat f2 = convert<jint, jfloat>(12);
+  jfloat f3 = convert<jint, jfloat>(13);
+  jfloat f4 = convert<jint, jfloat>(14);
+  jfloat f5 = convert<jint, jfloat>(15);
+  jfloat f6 = convert<jint, jfloat>(16);
+  jfloat f7 = convert<jint, jfloat>(17);
+  jfloat f8 = convert<jint, jfloat>(18);
+  jfloat f9 = convert<jint, jfloat>(19);
+  jfloat f10 = convert<jint, jfloat>(20);
+
+  env_->CallStaticVoidMethod(jklass_, jmethod_, i1, f1, i2, f2, i3, f3, i4, f4, i5, f5, i6, f6, i7,
+                             f7, i8, f8, i9, f9, i10, f10);
+}
+
 }  // namespace art
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index ab39d6b..ae18d2e 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -145,7 +145,7 @@
   // Method*, LR and callee save area size, local reference segment state
   size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for SIRT header
-  size_t sirt_size = (ReferenceCount() + 2) * sirt_pointer_size_;
+  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index c408fa9..6212a23 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -21,14 +21,29 @@
 namespace art {
 namespace arm64 {
 
-// Calling convention
+static const Register kCoreArgumentRegisters[] = {
+  X0, X1, X2, X3, X4, X5, X6, X7
+};
 
+static const WRegister kWArgumentRegisters[] = {
+  W0, W1, W2, W3, W4, W5, W6, W7
+};
+
+static const DRegister kDArgumentRegisters[] = {
+  D0, D1, D2, D3, D4, D5, D6, D7
+};
+
+static const SRegister kSArgumentRegisters[] = {
+  S0, S1, S2, S3, S4, S5, S6, S7
+};
+
+// Calling convention
 ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
-  return Arm64ManagedRegister::FromCoreRegister(IP0);  // X16
+  return Arm64ManagedRegister::FromCoreRegister(X20);  // saved on entry restored on exit
 }
 
 ManagedRegister Arm64JniCallingConvention::InterproceduralScratchRegister() {
-  return Arm64ManagedRegister::FromCoreRegister(IP0);  // X16
+  return Arm64ManagedRegister::FromCoreRegister(X20);  // saved on entry restored on exit
 }
 
 static ManagedRegister ReturnRegisterForShorty(const char* shorty) {
@@ -79,64 +94,64 @@
 FrameOffset Arm64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
   CHECK(IsCurrentParamOnStack());
   FrameOffset result =
-      FrameOffset(displacement_.Int32Value() +         // displacement
+      FrameOffset(displacement_.Int32Value() +   // displacement
                   kFramePointerSize +                 // Method*
-                  (itr_slots_ * kFramePointerSize));  // offset into in args
+                  (itr_slots_ * sizeof(uint32_t)));  // offset into in args
   return result;
 }
 
 const ManagedRegisterEntrySpills& Arm64ManagedRuntimeCallingConvention::EntrySpills() {
   // We spill the argument registers on ARM64 to free them up for scratch use, we then assume
   // all arguments are on the stack.
-  if (entry_spills_.size() == 0) {
-    // TODO Need fp regs spilled too.
-    //
-    size_t num_spills = NumArgs();
+  if ((entry_spills_.size() == 0) && (NumArgs() > 0)) {
+    int gp_reg_index = 1;   // we start from X1/W1, X0 holds ArtMethod*.
+    int fp_reg_index = 0;   // D0/S0.
 
-    // TODO Floating point need spilling too.
-    if (num_spills > 0) {
-      entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X1));
-      if (num_spills > 1) {
-        entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X2));
-        if (num_spills > 2) {
-          entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X3));
-          if (num_spills > 3) {
-            entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X5));
-            if (num_spills > 4) {
-              entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X6));
-              if (num_spills > 5) {
-                entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(X7));
-              }
+    // We need to choose the correct register (D/S or X/W) since the managed
+    // stack uses 32bit stack slots.
+    ResetIterator(FrameOffset(0));
+    while (HasNext()) {
+      if (IsCurrentParamAFloatOrDouble()) {  // FP regs.
+          if (fp_reg_index < 8) {
+            if (!IsCurrentParamADouble()) {
+              entry_spills_.push_back(Arm64ManagedRegister::FromSRegister(kSArgumentRegisters[fp_reg_index]));
+            } else {
+              entry_spills_.push_back(Arm64ManagedRegister::FromDRegister(kDArgumentRegisters[fp_reg_index]));
             }
+            fp_reg_index++;
+          } else {  // just increase the stack offset.
+            if (!IsCurrentParamADouble()) {
+              entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
+            } else {
+              entry_spills_.push_back(ManagedRegister::NoRegister(), 8);
+            }
+          }
+      } else {  // GP regs.
+        if (gp_reg_index < 8) {
+          if (IsCurrentParamALong() && (!IsCurrentParamAReference())) {
+            entry_spills_.push_back(Arm64ManagedRegister::FromCoreRegister(kCoreArgumentRegisters[gp_reg_index]));
+          } else {
+            entry_spills_.push_back(Arm64ManagedRegister::FromWRegister(kWArgumentRegisters[gp_reg_index]));
+          }
+          gp_reg_index++;
+        } else {  // just increase the stack offset.
+          if (IsCurrentParamALong() && (!IsCurrentParamAReference())) {
+              entry_spills_.push_back(ManagedRegister::NoRegister(), 8);
+          } else {
+              entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
           }
         }
       }
+      Next();
     }
   }
-
   return entry_spills_;
 }
-// JNI calling convention
 
+// JNI calling convention
 Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized,
                                                      const char* shorty)
     : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
-  // TODO This needs to be converted to 64bit.
-  // Compute padding to ensure longs and doubles are not split in AAPCS. Ignore the 'this' jobject
-  // or jclass for static methods and the JNIEnv. We start at the aligned register r2.
-//  size_t padding = 0;
-//  for (size_t cur_arg = IsStatic() ? 0 : 1, cur_reg = 2; cur_arg < NumArgs(); cur_arg++) {
-//    if (IsParamALongOrDouble(cur_arg)) {
-//      if ((cur_reg & 1) != 0) {
-//        padding += 4;
-//        cur_reg++;  // additional bump to ensure alignment
-//      }
-//      cur_reg++;  // additional bump to skip extra long word
-//    }
-//    cur_reg++;  // bump the iterator for every argument
-//  }
-  padding_ =0;
-
   callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X19));
   callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X20));
   callee_save_regs_.push_back(Arm64ManagedRegister::FromCoreRegister(X21));
@@ -162,83 +177,87 @@
 uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
   // Compute spill mask to agree with callee saves initialized in the constructor
   uint32_t result = 0;
-  result =  1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 | 1 << X25
-      | 1 << X26 | 1 << X27 | 1 << X28 | 1<< X29 | 1 << LR;
+  result =  1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 |
+            1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
+  return result;
+}
+
+uint32_t Arm64JniCallingConvention::FpSpillMask() const {
+  // Compute spill mask to agree with callee saves initialized in the constructor
+  uint32_t result = 0;
+  result = 1 << D8 | 1 << D9 | 1 << D10 | 1 << D11 | 1 << D12 | 1 << D13 |
+           1 << D14 | 1 << D15;
   return result;
 }
 
 ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
-  return Arm64ManagedRegister::FromCoreRegister(X9);
+  return ManagedRegister::NoRegister();
 }
 
 size_t Arm64JniCallingConvention::FrameSize() {
-  // Method*, LR and callee save area size, local reference segment state
-  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  // Method*, callee save area size, local reference segment state
+  size_t frame_data_size = ((1 + CalleeSaveRegisters().size()) * kFramePointerSize) + sizeof(uint32_t);
   // References plus 2 words for SIRT header
-  size_t sirt_size = (ReferenceCount() + 2) * sirt_pointer_size_;
+  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
 }
 
 size_t Arm64JniCallingConvention::OutArgSize() {
-  return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize + padding_,
-                 kStackAlignment);
-}
-
-// JniCallingConvention ABI follows AAPCS where longs and doubles must occur
-// in even register numbers and stack slots
-void Arm64JniCallingConvention::Next() {
-  JniCallingConvention::Next();
-  size_t arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-  if ((itr_args_ >= 2) &&
-      (arg_pos < NumArgs()) &&
-      IsParamALongOrDouble(arg_pos)) {
-    // itr_slots_ needs to be an even number, according to AAPCS.
-    if ((itr_slots_ & 0x1u) != 0) {
-      itr_slots_++;
-    }
-  }
+  return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
 bool Arm64JniCallingConvention::IsCurrentParamInRegister() {
-  return itr_slots_ < 4;
+  if (IsCurrentParamAFloatOrDouble()) {
+    return (itr_float_and_doubles_ < 8);
+  } else {
+    return ((itr_args_ - itr_float_and_doubles_) < 8);
+  }
 }
 
 bool Arm64JniCallingConvention::IsCurrentParamOnStack() {
   return !IsCurrentParamInRegister();
 }
 
-// TODO and floating point?
-
-static const Register kJniArgumentRegisters[] = {
-  X0, X1, X2, X3, X4, X5, X6, X7
-};
 ManagedRegister Arm64JniCallingConvention::CurrentParamRegister() {
-  CHECK_LT(itr_slots_, 4u);
-  int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-  // TODO Floating point & 64bit registers.
-  if ((itr_args_ >= 2) && IsParamALongOrDouble(arg_pos)) {
-    CHECK_EQ(itr_slots_, 2u);
-    return Arm64ManagedRegister::FromCoreRegister(X1);
+  CHECK(IsCurrentParamInRegister());
+  if (IsCurrentParamAFloatOrDouble()) {
+    CHECK_LT(itr_float_and_doubles_, 8u);
+    if (IsCurrentParamADouble()) {
+      return Arm64ManagedRegister::FromDRegister(kDArgumentRegisters[itr_float_and_doubles_]);
+    } else {
+      return Arm64ManagedRegister::FromSRegister(kSArgumentRegisters[itr_float_and_doubles_]);
+    }
   } else {
-    return
-      Arm64ManagedRegister::FromCoreRegister(kJniArgumentRegisters[itr_slots_]);
+    int gp_reg = itr_args_ - itr_float_and_doubles_;
+    CHECK_LT(static_cast<unsigned int>(gp_reg), 8u);
+    if (IsCurrentParamALong() || IsCurrentParamAReference() || IsCurrentParamJniEnv())  {
+      return Arm64ManagedRegister::FromCoreRegister(kCoreArgumentRegisters[gp_reg]);
+    } else {
+      return Arm64ManagedRegister::FromWRegister(kWArgumentRegisters[gp_reg]);
+    }
   }
 }
 
 FrameOffset Arm64JniCallingConvention::CurrentParamStackOffset() {
-  CHECK_GE(itr_slots_, 4u);
-  size_t offset = displacement_.Int32Value() - OutArgSize() + ((itr_slots_ - 4) * kFramePointerSize);
+  CHECK(IsCurrentParamOnStack());
+  size_t args_on_stack = itr_args_
+                  - std::min(8u, itr_float_and_doubles_)
+                  - std::min(8u, (itr_args_ - itr_float_and_doubles_));
+  size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
   CHECK_LT(offset, OutArgSize());
   return FrameOffset(offset);
 }
 
 size_t Arm64JniCallingConvention::NumberOfOutgoingStackArgs() {
-  size_t static_args = IsStatic() ? 1 : 0;  // count jclass
-  // regular argument parameters and this
-  size_t param_args = NumArgs() + NumLongOrDoubleArgs();
-  // count JNIEnv* less arguments in registers
-  return static_args + param_args + 1 - 4;
+  // all arguments including JNI args
+  size_t all_args = NumArgs() + NumberOfExtraArgumentsForJni();
+
+  size_t all_stack_args = all_args -
+            std::min(8u, static_cast<unsigned int>(NumFloatOrDoubleArgs())) -
+            std::min(8u, static_cast<unsigned int>((all_args - NumFloatOrDoubleArgs())));
+
+  return all_stack_args;
 }
 
 }  // namespace arm64
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index c18cd2b..92f547c 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -55,7 +55,6 @@
   ManagedRegister IntReturnRegister() OVERRIDE;
   ManagedRegister InterproceduralScratchRegister() OVERRIDE;
   // JNI calling convention
-  void Next() OVERRIDE;  // Override default behavior for AAPCS
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
   const std::vector<ManagedRegister>& CalleeSaveRegisters() const OVERRIDE {
@@ -63,9 +62,7 @@
   }
   ManagedRegister ReturnScratchRegister() const OVERRIDE;
   uint32_t CoreSpillMask() const OVERRIDE;
-  uint32_t FpSpillMask() const OVERRIDE {
-    return 0;  // Floats aren't spilled in JNI down call
-  }
+  uint32_t FpSpillMask() const OVERRIDE;
   bool IsCurrentParamInRegister() OVERRIDE;
   bool IsCurrentParamOnStack() OVERRIDE;
   ManagedRegister CurrentParamRegister() OVERRIDE;
@@ -78,9 +75,6 @@
   // TODO: these values aren't unique and can be shared amongst instances
   std::vector<ManagedRegister> callee_save_regs_;
 
-  // Padding to ensure longs and doubles are not split in AAPCS
-  size_t padding_;
-
   DISALLOW_COPY_AND_ASSIGN(Arm64JniCallingConvention);
 };
 
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index 8efdcda..a99a4c2 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -90,6 +90,14 @@
   return IsParamAFloatOrDouble(itr_args_);
 }
 
+bool ManagedRuntimeCallingConvention::IsCurrentParamADouble() {
+  return IsParamADouble(itr_args_);
+}
+
+bool ManagedRuntimeCallingConvention::IsCurrentParamALong() {
+  return IsParamALong(itr_args_);
+}
+
 // JNI calling convention
 
 JniCallingConvention* JniCallingConvention::Create(bool is_static, bool is_synchronized,
@@ -168,6 +176,10 @@
   }
 }
 
+bool JniCallingConvention::IsCurrentParamJniEnv() {
+  return (itr_args_ == kJniEnv);
+}
+
 bool JniCallingConvention::IsCurrentParamAFloatOrDouble() {
   switch (itr_args_) {
     case kJniEnv:
@@ -181,6 +193,32 @@
   }
 }
 
+bool JniCallingConvention::IsCurrentParamADouble() {
+  switch (itr_args_) {
+    case kJniEnv:
+      return false;  // JNIEnv*
+    case kObjectOrClass:
+      return false;   // jobject or jclass
+    default: {
+      int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
+      return IsParamADouble(arg_pos);
+    }
+  }
+}
+
+bool JniCallingConvention::IsCurrentParamALong() {
+  switch (itr_args_) {
+    case kJniEnv:
+      return false;  // JNIEnv*
+    case kObjectOrClass:
+      return false;   // jobject or jclass
+    default: {
+      int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
+      return IsParamALong(arg_pos);
+    }
+  }
+}
+
 // Return position of SIRT entry holding reference at the current iterator
 // position
 FrameOffset JniCallingConvention::CurrentParamSirtEntryOffset() {
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 7e1cf63..4d25d1c 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -126,6 +126,24 @@
     char ch = shorty_[param];
     return (ch == 'F' || ch == 'D');
   }
+  bool IsParamADouble(unsigned int param) const {
+    DCHECK_LT(param, NumArgs());
+    if (IsStatic()) {
+      param++;  // 0th argument must skip return value at start of the shorty
+    } else if (param == 0) {
+      return false;  // this argument
+    }
+    return shorty_[param] == 'D';
+  }
+  bool IsParamALong(unsigned int param) const {
+    DCHECK_LT(param, NumArgs());
+    if (IsStatic()) {
+      param++;  // 0th argument must skip return value at start of the shorty
+    } else if (param == 0) {
+      return true;  // this argument
+    }
+    return shorty_[param] == 'J';
+  }
   bool IsParamAReference(unsigned int param) const {
     DCHECK_LT(param, NumArgs());
     if (IsStatic()) {
@@ -214,6 +232,8 @@
   void Next();
   bool IsCurrentParamAReference();
   bool IsCurrentParamAFloatOrDouble();
+  bool IsCurrentParamADouble();
+  bool IsCurrentParamALong();
   bool IsCurrentArgExplicit();  // ie a non-implict argument such as this
   bool IsCurrentArgPossiblyNull();
   size_t CurrentParamSize();
@@ -283,6 +303,9 @@
   virtual void Next();
   bool IsCurrentParamAReference();
   bool IsCurrentParamAFloatOrDouble();
+  bool IsCurrentParamADouble();
+  bool IsCurrentParamALong();
+  bool IsCurrentParamJniEnv();
   size_t CurrentParamSize();
   virtual bool IsCurrentParamInRegister() = 0;
   virtual bool IsCurrentParamOnStack() = 0;
@@ -299,17 +322,17 @@
 
   FrameOffset SirtLinkOffset() const {
     return FrameOffset(SirtOffset().Int32Value() +
-                       StackIndirectReferenceTable::LinkOffset());
+                       StackIndirectReferenceTable::LinkOffset(frame_pointer_size_));
   }
 
   FrameOffset SirtNumRefsOffset() const {
     return FrameOffset(SirtOffset().Int32Value() +
-                       StackIndirectReferenceTable::NumberOfReferencesOffset());
+                       StackIndirectReferenceTable::NumberOfReferencesOffset(frame_pointer_size_));
   }
 
   FrameOffset SirtReferencesOffset() const {
     return FrameOffset(SirtOffset().Int32Value() +
-                       StackIndirectReferenceTable::ReferencesOffset());
+                       StackIndirectReferenceTable::ReferencesOffset(frame_pointer_size_));
   }
 
   virtual ~JniCallingConvention() {}
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index dcdcdd1..64508d1 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -29,6 +29,7 @@
 #include "utils/assembler.h"
 #include "utils/managed_register.h"
 #include "utils/arm/managed_register_arm.h"
+#include "utils/arm64/managed_register_arm64.h"
 #include "utils/mips/managed_register_mips.h"
 #include "utils/x86/managed_register_x86.h"
 #include "thread.h"
@@ -73,11 +74,17 @@
 
   // Calling conventions to call into JNI method "end" possibly passing a returned reference, the
   //     method and the current thread.
-  size_t jni_end_arg_count = 0;
-  if (reference_return) { jni_end_arg_count++; }
-  if (is_synchronized) { jni_end_arg_count++; }
-  const char* jni_end_shorty = jni_end_arg_count == 0 ? "I"
-                                                        : (jni_end_arg_count == 1 ? "II" : "III");
+  const char* jni_end_shorty;
+  if (reference_return && is_synchronized) {
+    jni_end_shorty = "ILL";
+  } else if (reference_return) {
+    jni_end_shorty = "IL";
+  } else if (is_synchronized) {
+    jni_end_shorty = "VL";
+  } else {
+    jni_end_shorty = "V";
+  }
+
   UniquePtr<JniCallingConvention> end_jni_conv(
       JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty, instruction_set));
 
@@ -101,12 +108,22 @@
   __ StoreImmediateToFrame(main_jni_conv->SirtNumRefsOffset(),
                            main_jni_conv->ReferenceCount(),
                            mr_conv->InterproceduralScratchRegister());
-  __ CopyRawPtrFromThread32(main_jni_conv->SirtLinkOffset(),
-                          Thread::TopSirtOffset<4>(),
-                          mr_conv->InterproceduralScratchRegister());
-  __ StoreStackOffsetToThread32(Thread::TopSirtOffset<4>(),
-                              main_jni_conv->SirtOffset(),
-                              mr_conv->InterproceduralScratchRegister());
+
+  if (instruction_set == kArm64 || instruction_set == kX86_64) {
+    __ CopyRawPtrFromThread64(main_jni_conv->SirtLinkOffset(),
+                            Thread::TopSirtOffset<8>(),
+                            mr_conv->InterproceduralScratchRegister());
+    __ StoreStackOffsetToThread64(Thread::TopSirtOffset<8>(),
+                                main_jni_conv->SirtOffset(),
+                                mr_conv->InterproceduralScratchRegister());
+  } else {
+    __ CopyRawPtrFromThread32(main_jni_conv->SirtLinkOffset(),
+                            Thread::TopSirtOffset<4>(),
+                            mr_conv->InterproceduralScratchRegister());
+    __ StoreStackOffsetToThread32(Thread::TopSirtOffset<4>(),
+                                main_jni_conv->SirtOffset(),
+                                mr_conv->InterproceduralScratchRegister());
+  }
 
   // 3. Place incoming reference arguments into SIRT
   main_jni_conv->Next();  // Skip JNIEnv*
@@ -154,9 +171,15 @@
   }
 
   // 4. Write out the end of the quick frames.
-  __ StoreStackPointerToThread32(Thread::TopOfManagedStackOffset<4>());
-  __ StoreImmediateToThread32(Thread::TopOfManagedStackPcOffset<4>(), 0,
-                            mr_conv->InterproceduralScratchRegister());
+  if (instruction_set == kArm64 || instruction_set == kX86_64) {
+    __ StoreStackPointerToThread64(Thread::TopOfManagedStackOffset<8>());
+    __ StoreImmediateToThread64(Thread::TopOfManagedStackPcOffset<8>(), 0,
+                              mr_conv->InterproceduralScratchRegister());
+  } else {
+    __ StoreStackPointerToThread32(Thread::TopOfManagedStackOffset<4>());
+    __ StoreImmediateToThread32(Thread::TopOfManagedStackPcOffset<4>(), 0,
+                              mr_conv->InterproceduralScratchRegister());
+  }
 
   // 5. Move frame down to allow space for out going args.
   const size_t main_out_arg_size = main_jni_conv->OutArgSize();
@@ -164,13 +187,14 @@
   const size_t max_out_arg_size = std::max(main_out_arg_size, end_out_arg_size);
   __ IncreaseFrameSize(max_out_arg_size);
 
-
   // 6. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable
   //    can occur. The result is the saved JNI local state that is restored by the exit call. We
   //    abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
   //    arguments.
-  ThreadOffset<4> jni_start = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodStartSynchronized)
-                                              : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodStart);
+  ThreadOffset<4> jni_start32 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodStartSynchronized)
+                                                : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodStart);
+  ThreadOffset<8> jni_start64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodStartSynchronized)
+                                                : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodStart);
   main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
   FrameOffset locked_object_sirt_offset(0);
   if (is_synchronized) {
@@ -192,12 +216,21 @@
   }
   if (main_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-    __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start),
-            main_jni_conv->InterproceduralScratchRegister());
+    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start64),
+             main_jni_conv->InterproceduralScratchRegister());
+    } else {
+      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start32),
+             main_jni_conv->InterproceduralScratchRegister());
+    }
   } else {
     __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                         main_jni_conv->InterproceduralScratchRegister());
-    __ CallFromThread32(jni_start, main_jni_conv->InterproceduralScratchRegister());
+    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+      __ CallFromThread64(jni_start64, main_jni_conv->InterproceduralScratchRegister());
+    } else {
+      __ CallFromThread32(jni_start32, main_jni_conv->InterproceduralScratchRegister());
+    }
   }
   if (is_synchronized) {  // Check for exceptions from monitor enter.
     __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), main_out_arg_size);
@@ -259,11 +292,20 @@
   if (main_jni_conv->IsCurrentParamInRegister()) {
     ManagedRegister jni_env = main_jni_conv->CurrentParamRegister();
     DCHECK(!jni_env.Equals(main_jni_conv->InterproceduralScratchRegister()));
-    __ LoadRawPtrFromThread32(jni_env, Thread::JniEnvOffset<4>());
+    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+      __ LoadRawPtrFromThread64(jni_env, Thread::JniEnvOffset<8>());
+    } else {
+      __ LoadRawPtrFromThread32(jni_env, Thread::JniEnvOffset<4>());
+    }
   } else {
     FrameOffset jni_env = main_jni_conv->CurrentParamStackOffset();
-    __ CopyRawPtrFromThread32(jni_env, Thread::JniEnvOffset<4>(),
+    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+      __ CopyRawPtrFromThread64(jni_env, Thread::JniEnvOffset<8>(),
                             main_jni_conv->InterproceduralScratchRegister());
+    } else {
+      __ CopyRawPtrFromThread32(jni_env, Thread::JniEnvOffset<4>(),
+                            main_jni_conv->InterproceduralScratchRegister());
+    }
   }
 
   // 9. Plant call to native code associated with method.
@@ -295,19 +337,23 @@
     __ Store(return_save_location, main_jni_conv->ReturnRegister(), main_jni_conv->SizeOfReturnValue());
   }
 
-  // 12. Call into JNI method end possibly passing a returned reference, the method and the current
   //     thread.
   end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
-  ThreadOffset<4> jni_end(-1);
+  ThreadOffset<4> jni_end32(-1);
+  ThreadOffset<8> jni_end64(-1);
   if (reference_return) {
     // Pass result.
-    jni_end = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndWithReferenceSynchronized)
-                              : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndWithReference);
+    jni_end32 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndWithReferenceSynchronized)
+                                : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndWithReference);
+    jni_end64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEndWithReferenceSynchronized)
+                                : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEndWithReference);
     SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
     end_jni_conv->Next();
   } else {
-    jni_end = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndSynchronized)
-                              : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEnd);
+    jni_end32 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEndSynchronized)
+                                : QUICK_ENTRYPOINT_OFFSET(4, pJniMethodEnd);
+    jni_end64 = is_synchronized ? QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEndSynchronized)
+                                : QUICK_ENTRYPOINT_OFFSET(8, pJniMethodEnd);
   }
   // Pass saved local reference state.
   if (end_jni_conv->IsCurrentParamOnStack()) {
@@ -334,12 +380,21 @@
   }
   if (end_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
-    __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end),
-            end_jni_conv->InterproceduralScratchRegister());
+    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end64),
+              end_jni_conv->InterproceduralScratchRegister());
+    } else {
+      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end32),
+              end_jni_conv->InterproceduralScratchRegister());
+    }
   } else {
     __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset(),
                         end_jni_conv->InterproceduralScratchRegister());
-    __ CallFromThread32(ThreadOffset<4>(jni_end), end_jni_conv->InterproceduralScratchRegister());
+    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+      __ CallFromThread64(ThreadOffset<8>(jni_end64), end_jni_conv->InterproceduralScratchRegister());
+    } else {
+      __ CallFromThread32(ThreadOffset<4>(jni_end32), end_jni_conv->InterproceduralScratchRegister());
+    }
   }
 
   // 13. Reload return value
@@ -360,6 +415,10 @@
   // 17. Finalize code generation
   __ EmitSlowPaths();
   size_t cs = __ CodeSize();
+  if (instruction_set == kArm64) {
+    // Test that we do not exceed the buffer size.
+    CHECK(cs < arm64::kBufferSizeArm64);
+  }
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 51a3f54..8e1c0c7 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -149,7 +149,7 @@
   // Method*, LR and callee save area size, local reference segment state
   size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for SIRT header
-  size_t sirt_size = (ReferenceCount() + 2) * sirt_pointer_size_;
+  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 8b440ed..153f953 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -126,7 +126,7 @@
   // Method*, return address and callee save area size, local reference segment state
   size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for SIRT header
-  size_t sirt_size = (ReferenceCount() + 2) * sirt_pointer_size_;
+  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 21e0bd7..4dfa29a 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -141,7 +141,7 @@
   // Method*, return address and callee save area size, local reference segment state
   size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus link_ (pointer) and number_of_references_ (uint32_t) for SIRT header
-  size_t sirt_size = kFramePointerSize + sizeof(uint32_t) + (ReferenceCount() * sirt_pointer_size_);
+  size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 2d45a2f..eff2425 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -345,6 +345,36 @@
   return offset;
 }
 
+static void DCheckCodeAlignment(size_t offset, InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      DCHECK_ALIGNED(offset, kArmAlignment);
+      break;
+
+    case kArm64:
+      DCHECK_ALIGNED(offset, kArm64Alignment);
+      break;
+
+    case kMips:
+      DCHECK_ALIGNED(offset, kMipsAlignment);
+      break;
+
+    case kX86_64:
+      // Fall-through.
+    case kX86:
+      DCHECK_ALIGNED(offset, kX86Alignment);
+      break;
+
+    case kNone:
+      // Use a DCHECK instead of FATAL so that in the non-debug case the whole switch can
+      // be optimized away.
+      DCHECK(false);
+      break;
+  }
+}
+
 size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index,
                                     size_t __attribute__((unused)) class_def_index,
                                     size_t class_def_method_index,
@@ -376,7 +406,8 @@
     } else {
       CHECK(quick_code != nullptr);
       offset = compiled_method->AlignCode(offset);
-      DCHECK_ALIGNED(offset, kArmAlignment);
+      DCheckCodeAlignment(offset, compiled_method->GetInstructionSet());
+
       uint32_t code_size = quick_code->size() * sizeof(uint8_t);
       CHECK_NE(code_size, 0U);
       uint32_t thumb_offset = compiled_method->CodeDelta();
@@ -826,7 +857,8 @@
         relative_offset += aligned_code_delta;
         DCHECK_OFFSET();
       }
-      DCHECK_ALIGNED(relative_offset, kArmAlignment);
+      DCheckCodeAlignment(relative_offset, compiled_method->GetInstructionSet());
+
       uint32_t code_size = quick_code->size() * sizeof(uint8_t);
       CHECK_NE(code_size, 0U);
 
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index a11c2da..1d87eaa 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -50,11 +50,11 @@
 }
 
 void Arm64Assembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsCoreRegister()), reg_x(TR));
+  ___ Mov(reg_x(tr.AsArm64().AsCoreRegister()), reg_x(TR1));
 }
 
 void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
-  StoreToOffset(TR, SP, offset.Int32Value());
+  StoreToOffset(TR1, SP, offset.Int32Value());
 }
 
 // See Arm64 PCS Section 5.2.2.1.
@@ -138,7 +138,8 @@
 void Arm64Assembler::StoreRef(FrameOffset offs, ManagedRegister m_src) {
   Arm64ManagedRegister src = m_src.AsArm64();
   CHECK(src.IsCoreRegister()) << src;
-  StoreToOffset(src.AsCoreRegister(), SP, offs.Int32Value());
+  StoreWToOffset(kStoreWord, src.AsOverlappingCoreRegisterLow(), SP,
+                 offs.Int32Value());
 }
 
 void Arm64Assembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
@@ -152,30 +153,31 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(scratch.AsCoreRegister(), SP, offs.Int32Value());
+  StoreWToOffset(kStoreWord, scratch.AsOverlappingCoreRegisterLow(), SP,
+                 offs.Int32Value());
 }
 
-void Arm64Assembler::StoreImmediateToThread32(ThreadOffset<4> offs, uint32_t imm,
+void Arm64Assembler::StoreImmediateToThread64(ThreadOffset<8> offs, uint32_t imm,
                                             ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(scratch.AsCoreRegister(), TR, offs.Int32Value());
+  StoreToOffset(scratch.AsCoreRegister(), TR1, offs.Int32Value());
 }
 
-void Arm64Assembler::StoreStackOffsetToThread32(ThreadOffset<4> tr_offs,
+void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs,
                                               FrameOffset fr_offs,
                                               ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsCoreRegister(), TR, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
 }
 
-void Arm64Assembler::StoreStackPointerToThread32(ThreadOffset<4> tr_offs) {
+void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) {
   // Arm64 does not support: "str sp, [dest]" therefore we use IP1 as a temp reg.
   ___ Mov(reg_x(IP1), reg_x(SP));
-  StoreToOffset(IP1, TR, tr_offs.Int32Value());
+  StoreToOffset(IP1, TR1, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source,
@@ -254,9 +256,13 @@
     CHECK_EQ(4u, size) << dest;
     ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset));
   } else if (dest.IsCoreRegister()) {
-    CHECK_EQ(8u, size) << dest;
     CHECK_NE(dest.AsCoreRegister(), SP) << dest;
-    ___ Ldr(reg_x(dest.AsCoreRegister()), MEM_OP(reg_x(base), offset));
+    if (size == 4u) {
+      ___ Ldr(reg_w(dest.AsOverlappingCoreRegisterLow()), MEM_OP(reg_x(base), offset));
+    } else {
+      CHECK_EQ(8u, size) << dest;
+      ___ Ldr(reg_x(dest.AsCoreRegister()), MEM_OP(reg_x(base), offset));
+    }
   } else if (dest.IsSRegister()) {
     ___ Ldr(reg_s(dest.AsSRegister()), MEM_OP(reg_x(base), offset));
   } else {
@@ -269,14 +275,14 @@
   return Load(m_dst.AsArm64(), SP, src.Int32Value(), size);
 }
 
-void Arm64Assembler::LoadFromThread32(ManagedRegister m_dst, ThreadOffset<4> src, size_t size) {
-  return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
+void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) {
+  return Load(m_dst.AsArm64(), TR1, src.Int32Value(), size);
 }
 
 void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(dst.AsCoreRegister(), SP, offs.Int32Value());
+  LoadWFromOffset(kLoadWord, dst.AsOverlappingCoreRegisterLow(), SP, offs.Int32Value());
 }
 
 void Arm64Assembler::LoadRef(ManagedRegister m_dst, ManagedRegister m_base,
@@ -284,7 +290,8 @@
   Arm64ManagedRegister dst = m_dst.AsArm64();
   Arm64ManagedRegister base = m_base.AsArm64();
   CHECK(dst.IsCoreRegister() && base.IsCoreRegister());
-  LoadFromOffset(dst.AsCoreRegister(), base.AsCoreRegister(), offs.Int32Value());
+  LoadWFromOffset(kLoadWord, dst.AsOverlappingCoreRegisterLow(), base.AsCoreRegister(),
+                  offs.Int32Value());
 }
 
 void Arm64Assembler::LoadRawPtr(ManagedRegister m_dst, ManagedRegister m_base, Offset offs) {
@@ -294,10 +301,10 @@
   LoadFromOffset(dst.AsCoreRegister(), base.AsCoreRegister(), offs.Int32Value());
 }
 
-void Arm64Assembler::LoadRawPtrFromThread32(ManagedRegister m_dst, ThreadOffset<4> offs) {
+void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   CHECK(dst.IsCoreRegister()) << dst;
-  LoadFromOffset(dst.AsCoreRegister(), TR, offs.Int32Value());
+  LoadFromOffset(dst.AsCoreRegister(), TR1, offs.Int32Value());
 }
 
 // Copying routines.
@@ -306,8 +313,16 @@
   Arm64ManagedRegister src = m_src.AsArm64();
   if (!dst.Equals(src)) {
     if (dst.IsCoreRegister()) {
-      CHECK(src.IsCoreRegister()) << src;
-      ___ Mov(reg_x(dst.AsCoreRegister()), reg_x(src.AsCoreRegister()));
+      if (size == 4) {
+        CHECK(src.IsWRegister());
+        ___ Mov(reg_x(dst.AsCoreRegister()), reg_w(src.AsWRegister()));
+      } else {
+        if (src.IsCoreRegister()) {
+          ___ Mov(reg_x(dst.AsCoreRegister()), reg_x(src.AsCoreRegister()));
+        } else {
+          ___ Mov(reg_x(dst.AsCoreRegister()), reg_w(src.AsWRegister()));
+        }
+      }
     } else if (dst.IsWRegister()) {
       CHECK(src.IsWRegister()) << src;
       ___ Mov(reg_w(dst.AsWRegister()), reg_w(src.AsWRegister()));
@@ -322,40 +337,42 @@
   }
 }
 
-void Arm64Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                          ThreadOffset<4> tr_offs,
+void Arm64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
+                                          ThreadOffset<8> tr_offs,
                                           ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(scratch.AsCoreRegister(), TR, tr_offs.Int32Value());
+  LoadFromOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
   StoreToOffset(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
 }
 
-void Arm64Assembler::CopyRawPtrToThread32(ThreadOffset<4> tr_offs,
+void Arm64Assembler::CopyRawPtrToThread64(ThreadOffset<8> tr_offs,
                                         FrameOffset fr_offs,
                                         ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsCoreRegister(), TR, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsCoreRegister(), TR1, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
                              ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadFromOffset(scratch.AsCoreRegister(), SP, src.Int32Value());
-  StoreToOffset(scratch.AsCoreRegister(), SP, dest.Int32Value());
+  LoadWFromOffset(kLoadWord, scratch.AsOverlappingCoreRegisterLow(),
+                  SP, src.Int32Value());
+  StoreWToOffset(kStoreWord, scratch.AsOverlappingCoreRegisterLow(),
+                 SP, dest.Int32Value());
 }
 
 void Arm64Assembler::Copy(FrameOffset dest, FrameOffset src,
                           ManagedRegister m_scratch, size_t size) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsCoreRegister() || scratch.IsWRegister()) << scratch;
+  CHECK(scratch.IsCoreRegister()) << scratch;
   CHECK(size == 4 || size == 8) << size;
   if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), SP, src.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsWRegister(), SP, dest.Int32Value());
+    LoadWFromOffset(kLoadWord, scratch.AsOverlappingCoreRegisterLow(), SP, src.Int32Value());
+    StoreWToOffset(kStoreWord, scratch.AsOverlappingCoreRegisterLow(), SP, dest.Int32Value());
   } else if (size == 8) {
     LoadFromOffset(scratch.AsCoreRegister(), SP, src.Int32Value());
     StoreToOffset(scratch.AsCoreRegister(), SP, dest.Int32Value());
@@ -418,10 +435,17 @@
   CHECK(scratch.IsCoreRegister() || scratch.IsWRegister()) << scratch;
   CHECK(size == 4 || size == 8) << size;
   if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsCoreRegister(),
+    if (scratch.IsWRegister()) {
+      LoadWFromOffset(kLoadWord, scratch.AsWRegister(), src.AsCoreRegister(),
                     src_offset.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsCoreRegister(),
+      StoreWToOffset(kStoreWord, scratch.AsWRegister(), dest.AsCoreRegister(),
                    dest_offset.Int32Value());
+    } else {
+      LoadWFromOffset(kLoadWord, scratch.AsOverlappingCoreRegisterLow(), src.AsCoreRegister(),
+                    src_offset.Int32Value());
+      StoreWToOffset(kStoreWord, scratch.AsOverlappingCoreRegisterLow(), dest.AsCoreRegister(),
+                   dest_offset.Int32Value());
+    }
   } else if (size == 8) {
     LoadFromOffset(scratch.AsCoreRegister(), src.AsCoreRegister(), src_offset.Int32Value());
     StoreToOffset(scratch.AsCoreRegister(), dest.AsCoreRegister(), dest_offset.Int32Value());
@@ -486,7 +510,7 @@
   ___ Blr(reg_x(scratch.AsCoreRegister()));
 }
 
-void Arm64Assembler::CallFromThread32(ThreadOffset<4> /*offset*/, ManagedRegister /*scratch*/) {
+void Arm64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*scratch*/) {
   UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
 }
 
@@ -502,10 +526,11 @@
     // the address in the SIRT holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
     if (in_reg.IsNoRegister()) {
-      LoadFromOffset(out_reg.AsCoreRegister(), SP, sirt_offs.Int32Value());
+      LoadWFromOffset(kLoadWord, out_reg.AsOverlappingCoreRegisterLow(), SP,
+                      sirt_offs.Int32Value());
       in_reg = out_reg;
     }
-    ___ Cmp(reg_x(in_reg.AsCoreRegister()), 0);
+    ___ Cmp(reg_w(in_reg.AsOverlappingCoreRegisterLow()), 0);
     if (!out_reg.Equals(in_reg)) {
       LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
     }
@@ -520,11 +545,12 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   if (null_allowed) {
-    LoadFromOffset(scratch.AsCoreRegister(), SP, sirt_offset.Int32Value());
+    LoadWFromOffset(kLoadWord, scratch.AsOverlappingCoreRegisterLow(), SP,
+                    sirt_offset.Int32Value());
     // Null values get a SIRT entry value of 0.  Otherwise, the sirt entry is
     // the address in the SIRT holding the reference.
     // e.g. scratch = (scratch == 0) ? 0 : (SP+sirt_offset)
-    ___ Cmp(reg_x(scratch.AsCoreRegister()), 0);
+    ___ Cmp(reg_w(scratch.AsOverlappingCoreRegisterLow()), 0);
     // Move this logic in add constants with flags.
     AddConstant(scratch.AsCoreRegister(), SP, sirt_offset.Int32Value(), NE);
   } else {
@@ -555,7 +581,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust);
   exception_blocks_.push_back(current_exception);
-  LoadFromOffset(scratch.AsCoreRegister(), TR, Thread::ExceptionOffset<4>().Int32Value());
+  LoadFromOffset(scratch.AsCoreRegister(), TR1, Thread::ExceptionOffset<8>().Int32Value());
   ___ Cmp(reg_x(scratch.AsCoreRegister()), 0);
   ___ B(current_exception->Entry(), COND_OP(NE));
 }
@@ -569,7 +595,11 @@
   // Pass exception object as argument.
   // Don't care about preserving X0 as this won't return.
   ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsCoreRegister()));
-  LoadFromOffset(IP1, TR, QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
+  LoadFromOffset(IP1, TR1, QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
+
+  // FIXME: Temporary fix for TR (XSELF).
+  ___ Mov(reg_x(TR), reg_x(TR1));
+
   ___ Blr(reg_x(IP1));
   // Call should never return.
   ___ Brk();
@@ -590,6 +620,9 @@
   CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize);
   ___ PushCalleeSavedRegisters();
 
+  // FIXME: Temporary fix for TR (XSELF).
+  ___ Mov(reg_x(TR1), reg_x(TR));
+
   // Increate frame to required size - must be at least space to push Method*.
   CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
   size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
@@ -598,11 +631,27 @@
   // Write Method*.
   StoreToOffset(X0, SP, 0);
 
-  // Write out entry spills, treated as X regs.
-  // TODO: we can implement a %2 STRP variant of StoreToOffset.
+  // Write out entry spills
+  int32_t offset = frame_size + kFramePointerSize;
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    Register reg = entry_spills.at(i).AsArm64().AsCoreRegister();
-    StoreToOffset(reg, SP, frame_size + kFramePointerSize + (i * kFramePointerSize));
+    Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
+    if (reg.IsNoRegister()) {
+      // only increment stack offset.
+      ManagedRegisterSpill spill = entry_spills.at(i);
+      offset += spill.getSize();
+    } else if (reg.IsCoreRegister()) {
+      StoreToOffset(reg.AsCoreRegister(), SP, offset);
+      offset += 8;
+    } else if (reg.IsWRegister()) {
+      StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsDRegister()) {
+      StoreDToOffset(reg.AsDRegister(), SP, offset);
+      offset += 8;
+    } else if (reg.IsSRegister()) {
+      StoreSToOffset(reg.AsSRegister(), SP, offset);
+      offset += 4;
+    }
   }
 }
 
@@ -618,6 +667,9 @@
   size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
   DecreaseFrameSize(adjust);
 
+  // FIXME: Temporary fix for TR (XSELF).
+  ___ Mov(reg_x(TR), reg_x(TR1));
+
   // Pop callee saved and return to LR.
   ___ PopCalleeSavedRegisters();
   ___ Ret();
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 8acd1f9..97fb93a 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -81,8 +81,8 @@
 
 class Arm64Assembler FINAL : public Assembler {
  public:
-  Arm64Assembler() : vixl_buf_(new byte[BUF_SIZE]),
-  vixl_masm_(new vixl::MacroAssembler(vixl_buf_, BUF_SIZE)) {}
+  Arm64Assembler() : vixl_buf_(new byte[kBufferSizeArm64]),
+  vixl_masm_(new vixl::MacroAssembler(vixl_buf_, kBufferSizeArm64)) {}
 
   virtual ~Arm64Assembler() {
     delete[] vixl_buf_;
@@ -114,27 +114,27 @@
   void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-  void StoreImmediateToThread32(ThreadOffset<4> dest, uint32_t imm, ManagedRegister scratch)
+  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, ManagedRegister scratch)
       OVERRIDE;
-  void StoreStackOffsetToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs,
+  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
                                   ManagedRegister scratch) OVERRIDE;
-  void StoreStackPointerToThread32(ThreadOffset<4> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
   void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
                      ManagedRegister scratch) OVERRIDE;
 
   // Load routines.
   void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-  void LoadFromThread32(ManagedRegister dest, ThreadOffset<4> src, size_t size) OVERRIDE;
+  void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size) OVERRIDE;
   void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
   void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs) OVERRIDE;
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-  void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset<4> offs) OVERRIDE;
+  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset<4> thr_offs,
+  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
                               ManagedRegister scratch) OVERRIDE;
-  void CopyRawPtrToThread32(ThreadOffset<4> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       OVERRIDE;
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
@@ -183,7 +183,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset<4> offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister scratch) OVERRIDE;
 
   // Jump to address (not setting link register)
   void JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch);
@@ -234,9 +234,6 @@
   void AddConstant(Register rd, int32_t value, Condition cond = AL);
   void AddConstant(Register rd, Register rn, int32_t value, Condition cond = AL);
 
-  // Vixl buffer size.
-  static constexpr size_t BUF_SIZE = 4096;
-
   // Vixl buffer.
   byte* vixl_buf_;
 
diff --git a/compiler/utils/arm64/constants_arm64.h b/compiler/utils/arm64/constants_arm64.h
index ecf9fbe..2a08c95 100644
--- a/compiler/utils/arm64/constants_arm64.h
+++ b/compiler/utils/arm64/constants_arm64.h
@@ -31,6 +31,9 @@
 
 constexpr unsigned int kCalleeSavedRegsSize = 20;
 
+// Vixl buffer size.
+constexpr size_t kBufferSizeArm64 = 4096*2;
+
 }  // arm64
 }  // art
 
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 0d31322..bfb2829 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -91,6 +91,9 @@
   explicit ManagedRegisterSpill(const ManagedRegister& other)
       : ManagedRegister(other), size_(-1), spill_offset_(-1) { }
 
+  explicit ManagedRegisterSpill(const ManagedRegister& other, int32_t size)
+      : ManagedRegister(other), size_(size), spill_offset_(-1) { }
+
   int32_t getSpillOffset() {
     return spill_offset_;
   }
@@ -113,6 +116,11 @@
     std::vector<ManagedRegisterSpill>::push_back(spill);
   }
 
+  void push_back(ManagedRegister __x, int32_t __size) {
+    ManagedRegisterSpill spill(__x, __size);
+    std::vector<ManagedRegisterSpill>::push_back(spill);
+  }
+
   void push_back(ManagedRegisterSpill __x) {
     std::vector<ManagedRegisterSpill>::push_back(__x);
   }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index c6e448e..ac76c35 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -139,7 +139,7 @@
   UsageError("      Example: --android-root=out/host/linux-x86");
   UsageError("      Default: $ANDROID_ROOT");
   UsageError("");
-  UsageError("  --instruction-set=(arm|mips|x86|x86_64): compile for a particular instruction");
+  UsageError("  --instruction-set=(arm|arm64|mips|x86|x86_64): compile for a particular instruction");
   UsageError("      set.");
   UsageError("      Example: --instruction-set=x86");
   UsageError("      Default: arm");
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 3d63c36..c96ff60 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -116,7 +116,8 @@
   uint64_t gprs[32];
   uint64_t fprs[32];
 
-  for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) {
+  // Do not use kNumberOfCoreRegisters, as this is with the distinction of SP and XZR
+  for (size_t i = 0; i < 32; ++i) {
     gprs[i] = gprs_[i] != NULL ? *gprs_[i] : Arm64Context::kBadGprBase + i;
   }
   for (size_t i = 0; i < kNumberOfDRegisters; ++i) {
diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S
index d2ed692..ba783ab 100644
--- a/runtime/arch/arm64/jni_entrypoints_arm64.S
+++ b/runtime/arch/arm64/jni_entrypoints_arm64.S
@@ -20,11 +20,76 @@
      * Jni dlsym lookup stub.
      */
     .extern artFindNativeMethod
-UNIMPLEMENTED art_jni_dlsym_lookup_stub
+
+    // TODO: Add CFI directives.
+ENTRY art_jni_dlsym_lookup_stub
+  // spill regs.
+  stp   x29, x30, [sp, #-16]!
+  mov   x29, sp
+  stp   d6, d7,   [sp, #-16]!
+  stp   d4, d5,   [sp, #-16]!
+  stp   d2, d3,   [sp, #-16]!
+  stp   d0, d1,   [sp, #-16]!
+  stp   x6, x7,   [sp, #-16]!
+  stp   x4, x5,   [sp, #-16]!
+  stp   x2, x3,   [sp, #-16]!
+  stp   x0, x1,   [sp, #-16]!
+
+  bl  artFindNativeMethod
+  mov  x17, x0    // store result in scratch reg.
+
+  // load spill regs.
+  ldp   x0, x1,   [sp], #16
+  ldp   x2, x3,   [sp], #16
+  ldp   x4, x5,   [sp], #16
+  ldp   x6, x7,   [sp], #16
+  ldp   d0, d1,   [sp], #16
+  ldp   d2, d3,   [sp], #16
+  ldp   d4, d5,   [sp], #16
+  ldp   d6, d7,   [sp], #16
+  ldp   x29, x30, [sp], #16
+
+  cbz   x17, 1f   // is method code null ?
+  br    x17       // if non-null, tail call to method's code.
+
+1:
+  ret             // restore regs and return to caller to handle exception.
+END art_jni_dlsym_lookup_stub
 
     /*
      * Entry point of native methods when JNI bug compatibility is enabled.
      */
     .extern artWorkAroundAppJniBugs
-UNIMPLEMENTED art_work_around_app_jni_bugs
+ENTRY art_work_around_app_jni_bugs
+  // spill regs.
+  stp   x29, x30, [sp, #-16]!
+  mov   x29, sp
+  stp   d6, d7,   [sp, #-16]!
+  stp   d4, d5,   [sp, #-16]!
+  stp   d2, d3,   [sp, #-16]!
+  stp   d0, d1,   [sp, #-16]!
+  stp   x6, x7,   [sp, #-16]!
+  stp   x4, x5,   [sp, #-16]!
+  stp   x2, x3,   [sp, #-16]!
+  stp   x0, x1,   [sp, #-16]!
+
+  mov   x0, x19   // Thread::Current.
+  mov   x1, sp    // SP.
+  bl    artWorkAroundAppJniBugs   // (Thread*, SP).
+  mov   x17, x0   // save target return.
+
+  // load spill regs.
+  ldp   x0, x1,   [sp], #16
+  ldp   x2, x3,   [sp], #16
+  ldp   x4, x5,   [sp], #16
+  ldp   x6, x7,   [sp], #16
+  ldp   d0, d1,   [sp], #16
+  ldp   d2, d3,   [sp], #16
+  ldp   d4, d5,   [sp], #16
+  ldp   d6, d7,   [sp], #16
+  ldp   x29, x30, [sp], #16
+
+  //tail call into JNI routine.
+  br    x17
+END art_work_around_app_jni_bugs
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 3082273..6ce5d06 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -60,26 +60,31 @@
 
     // Callee saved.
     stp xSELF, x19, [sp, #264]
-    stp x20, x21, [sp, #280]
-    stp x22, x23, [sp, #296]
-    stp x24, x25, [sp, #312]
-    stp x26, x27, [sp, #328]
-    stp x28, xFP, [sp, #344]    // Save FP.
-    str xLR, [sp, #360]
+    .cfi_rel_offset x18, 264
+    .cfi_rel_offset x19, 272
 
-    .cfi_offset x18,72
-    .cfi_offset x19,80
-    .cfi_offset x20,88
-    .cfi_offset x21,96
-    .cfi_offset x22,104
-    .cfi_offset x23,112
-    .cfi_offset x24,120
-    .cfi_offset x25,128
-    .cfi_offset x26,136
-    .cfi_offset x27,144
-    .cfi_offset x28,152
-    .cfi_offset x29,160
-    .cfi_offset x30,168
+    stp x20, x21, [sp, #280]
+    .cfi_rel_offset x20, 280
+    .cfi_rel_offset x21, 288
+
+    stp x22, x23, [sp, #296]
+    .cfi_rel_offset x22, 296
+    .cfi_rel_offset x23, 304
+
+    stp x24, x25, [sp, #312]
+    .cfi_rel_offset x24, 312
+    .cfi_rel_offset x25, 320
+
+    stp x26, x27, [sp, #328]
+    .cfi_rel_offset x26, 328
+    .cfi_rel_offset x27, 336
+
+    stp x28, xFP, [sp, #344]    // Save FP.
+    .cfi_rel_offset x28, 344
+    .cfi_rel_offset x29, 352
+
+    str xLR, [sp, #360]
+    .cfi_rel_offset x30, 360
 
     // Loads appropriate callee-save-method
     str x9, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
@@ -117,36 +122,44 @@
     stp d14, d15, [sp, #128]
 
     stp x1,  x2, [sp, #144]
-    stp x3,  x4, [sp, #160]
-    stp x5,  x6, [sp, #176]
-    stp x7,  xSELF, [sp, #192]
-    stp x19, x20, [sp, #208]
-    stp x21, x22, [sp, #224]
-    stp x23, x24, [sp, #240]
-    stp x25, x26, [sp, #256]
-    stp x27, x28, [sp, #272]
-    stp xFP, xLR, [sp, #288]
+    .cfi_rel_offset x1, 144
+    .cfi_rel_offset x2, 152
 
-    .cfi_offset x1,144
-    .cfi_offset x2,152
-    .cfi_offset x3,160
-    .cfi_offset x4,168
-    .cfi_offset x5,176
-    .cfi_offset x6,184
-    .cfi_offset x7,192
-    .cfi_offset x18,200
-    .cfi_offset x19,208
-    .cfi_offset x20,216
-    .cfi_offset x21,224
-    .cfi_offset x22,232
-    .cfi_offset x23,240
-    .cfi_offset x24,248
-    .cfi_offset x25,256
-    .cfi_offset x26,264
-    .cfi_offset x27,272
-    .cfi_offset x28,280
-    .cfi_offset x29,288
-    .cfi_offset x30,296
+    stp x3,  x4, [sp, #160]
+    .cfi_rel_offset x3, 160
+    .cfi_rel_offset x4, 168
+
+    stp x5,  x6, [sp, #176]
+    .cfi_rel_offset x5, 176
+    .cfi_rel_offset x6, 184
+
+    stp x7,  xSELF, [sp, #192]
+    .cfi_rel_offset x7, 192
+    .cfi_rel_offset x18, 200
+
+    stp x19, x20, [sp, #208]
+    .cfi_rel_offset x19, 208
+    .cfi_rel_offset x20, 216
+
+    stp x21, x22, [sp, #224]
+    .cfi_rel_offset x21, 224
+    .cfi_rel_offset x22, 232
+
+    stp x23, x24, [sp, #240]
+    .cfi_rel_offset x23, 240
+    .cfi_rel_offset x24, 248
+
+    stp x25, x26, [sp, #256]
+    .cfi_rel_offset x25, 256
+    .cfi_rel_offset x26, 264
+
+    stp x27, x28, [sp, #272]
+    .cfi_rel_offset x27, 272
+    .cfi_rel_offset x28, 280
+
+    stp xFP, xLR, [sp, #288]
+    .cfi_rel_offset x29, 288
+    .cfi_rel_offset x30, 296
 .endm
 
     /*
@@ -183,15 +196,44 @@
 
     // args.
     ldp x1,  x2, [sp, #144]
+    .cfi_restore x1
+    .cfi_restore x2
+
     ldp x3,  x4, [sp, #160]
+    .cfi_restore x3
+    .cfi_restore x4
+
     ldp x5,  x6, [sp, #176]
+    .cfi_restore x5
+    .cfi_restore x6
+
     ldp x7,  xSELF, [sp, #192]
+    .cfi_restore x7
+    .cfi_restore x18
+
     ldp x19, x20, [sp, #208]
+    .cfi_restore x19
+    .cfi_restore x20
+
     ldp x21, x22, [sp, #224]
+    .cfi_restore x21
+    .cfi_restore x22
+
     ldp x23, x24, [sp, #240]
+    .cfi_restore x23
+    .cfi_restore x24
+
     ldp x25, x26, [sp, #256]
+    .cfi_restore x25
+    .cfi_restore x26
+
     ldp x27, x28, [sp, #272]
+    .cfi_restore x27
+    .cfi_restore x28
+
     ldp xFP, xLR, [sp, #288]
+    .cfi_restore x29
+    .cfi_restore x30
 
     add sp, sp, #304
     .cfi_adjust_cfa_offset -304
@@ -210,15 +252,44 @@
 
     // args.
     ldp x1,  x2, [sp, #144]
+    .cfi_restore x1
+    .cfi_restore x2
+
     ldp x3,  x4, [sp, #160]
+    .cfi_restore x3
+    .cfi_restore x4
+
     ldp x5,  x6, [sp, #176]
+    .cfi_restore x5
+    .cfi_restore x6
+
     ldp x7,  xSELF, [sp, #192]
+    .cfi_restore x7
+    .cfi_restore x18
+
     ldp x19, x20, [sp, #208]
+    .cfi_restore x19
+    .cfi_restore x20
+
     ldp x21, x22, [sp, #224]
+    .cfi_restore x21
+    .cfi_restore x22
+
     ldp x23, x24, [sp, #240]
+    .cfi_restore x23
+    .cfi_restore x24
+
     ldp x25, x26, [sp, #256]
+    .cfi_restore x25
+    .cfi_restore x26
+
     ldp x27, x28, [sp, #272]
+    .cfi_restore x27
+    .cfi_restore x28
+
     ldp xFP, xLR, [sp, #288]
+    .cfi_restore x29
+    .cfi_restore x30
 
     add sp, sp, #304
     .cfi_adjust_cfa_offset -304
@@ -261,9 +332,14 @@
 END \c_name
 .endm
 
+// FIXME: Temporary fix for TR(XSELF).
 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context.
+    mov x1, x19                       // pass Thread::Current.
+    mov x2, sp                        // pass SP.
+    b   \cxx_name                     // \cxx_name(Thread*, SP).
     brk 0
 END \c_name
 .endm
@@ -340,6 +416,113 @@
 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
+
+.macro INVOKE_STUB_CREATE_FRAME
+
+SAVE_SIZE=5*8   // x4, x5, SP, LR & FP saved.
+SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
+
+    mov x9, sp                          // Save stack pointer.
+    .cfi_register sp,x9
+
+    add x10, x2, # SAVE_SIZE_AND_METHOD // calculate size of frame.
+    sub x10, sp, x10                    // Calculate SP position - saves + ArtMethod* +  args
+    and x10, x10, # ~0xf                // Enforce 16 byte stack alignment.
+    mov sp, x10                         // Set new SP.
+
+    sub x10, x9, #SAVE_SIZE             // Calculate new FP (later). Done here as we must move SP
+    .cfi_def_cfa_register x10           // before this.
+    .cfi_adjust_cfa_offset SAVE_SIZE
+
+    str x9, [x10, #32]                  // Save old stack pointer.
+    .cfi_rel_offset sp, 32
+
+    stp x4, x5, [x10, #16]              // Save result and shorty addresses.
+    .cfi_rel_offset x4, 16
+    .cfi_rel_offset x5, 24
+
+    stp xFP, xLR, [x10]                 // Store LR & FP.
+    .cfi_rel_offset x29, 0
+    .cfi_rel_offset x30, 8
+
+    mov xFP, x10                        // Use xFP now, as it's callee-saved.
+    .cfi_def_cfa_register x29
+    mov xSELF, x3                       // Move thread pointer into SELF register.
+
+    // Copy arguments into stack frame.
+    // Use simple copy routine for now.
+    // 4 bytes per slot.
+    // X1 - source address
+    // W2 - args length
+    // X9 - destination address.
+    // W10 - temporary
+    add x9, sp, #8     // Destination address is bottom of stack + NULL.
+
+    // Use \@ to differentiate between macro invocations.
+.LcopyParams\@:
+    cmp w2, #0
+    beq .LendCopyParams\@
+    sub w2, w2, #4      // Need 65536 bytes of range.
+    ldr w10, [x1, x2]
+    str w10, [x9, x2]
+
+    b .LcopyParams\@
+
+.LendCopyParams\@:
+
+    // Store NULL into Method* at bottom of frame.
+    str xzr, [sp]
+
+.endm
+
+.macro INVOKE_STUB_CALL_AND_RETURN
+
+    // load method-> METHOD_QUICK_CODE_OFFSET
+    ldr x9, [x0 , #METHOD_QUICK_CODE_OFFSET]
+    // Branch to method.
+    blr x9
+
+    // Restore return value address and shorty address.
+    ldp x4,x5, [xFP, #16]
+    .cfi_restore x4
+    .cfi_restore x5
+
+    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
+    ldrb w10, [x5]
+
+    // Don't set anything for a void type.
+    cmp w10, #'V'
+    beq .Lexit_art_quick_invoke_stub\@
+
+    cmp w10, #'D'
+    bne .Lreturn_is_float\@
+    str d0, [x4]
+    b .Lexit_art_quick_invoke_stub\@
+
+.Lreturn_is_float\@:
+    cmp w10, #'F'
+    bne .Lreturn_is_int\@
+    str s0, [x4]
+    b .Lexit_art_quick_invoke_stub\@
+
+    // Just store x0. Doesn't matter if it is 64 or 32 bits.
+.Lreturn_is_int\@:
+    str x0, [x4]
+
+.Lexit_art_quick_invoke_stub\@:
+    ldr x2, [x29, #32]   // Restore stack pointer.
+    mov sp, x2
+    .cfi_restore sp
+
+    ldp x29, x30, [x29]    // Restore old frame pointer and link register.
+    .cfi_restore x29
+    .cfi_restore x30
+
+    ret
+
+.endm
+
+
 /*
  *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
  *                                       uint32_t  *args,     x1
@@ -377,63 +560,7 @@
  */
 ENTRY art_quick_invoke_stub
     // Spill registers as per AACPS64 calling convention.
-
-SAVE_SIZE=5*8   // x4, x5, LR & FP saved.
-SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
-
-    mov x9, sp     // Save stack pointer.
-
-    mov x10, xFP   // Save frame pointer
-    .cfi_register x29,x10
-    add x11, x2, # SAVE_SIZE_AND_METHOD // calculate size of frame.
-
-    sub x11, sp, x11 // Calculate SP position - saves + ArtMethod* +  args
-
-    and x11, x11, # ~0xf  // Enforce 16 byte stack alignment.
-
-    sub xFP, x9, #SAVE_SIZE   // Calculate new FP. Don't store here until SP moved.
-    .cfi_def_cfa_register x29
-
-    mov sp, x11        // set new SP.
-
-    str x9, [xFP, #32]     // Save old stack pointer.
-
-    .cfi_offset x9, 32
-
-    stp x4, x5, [xFP, #16]  // Save result and shorty addresses.
-
-    .cfi_offset x4, 16
-    .cfi_offset x5, 24
-
-    stp x10, xLR, [xFP]   // Store lr & old fp @ fp
-
-    .cfi_offset x30, 0
-    .cfi_offset x10, 8
-
-    mov xSELF, x3       // Move thread pointer into SELF register.
-
-    // Copy arguments into stack frame.
-    // Use simple copy routine for now.
-    // 4 bytes per slot.
-    // X1 - source address
-    // W2 - args length
-    // X10 - destination address.
-    add x9, sp, #8     // Destination address is bottom of stack + NULL.
-
-    // w2 = argsize parameter.
-.LcopyParams:
-    cmp w2, #0
-    beq .LendCopyParams
-    sub w2, w2, #4      // Need 65536 bytes of range.
-    ldr w10, [x1, x2]
-    str w10, [x9, x2]
-
-    b .LcopyParams
-
-.LendCopyParams:
-
-    // Store NULL into Method* at bottom of frame.
-    str xzr, [sp]
+    INVOKE_STUB_CREATE_FRAME
 
     // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
     // Parse the passed shorty to determine which register to load.
@@ -460,7 +587,7 @@
     bne .LisDouble
 
     cmp x15, # 8*12         // Skip this load if all registers full.
-    beq .LfillRegisters
+    beq .Ladvance4
 
     add x17, x13, x15       // Calculate subroutine to jump to.
     br  x17
@@ -470,8 +597,7 @@
     bne .LisLong
 
     cmp x15, # 8*12         // Skip this load if all registers full.
-    beq .LfillRegisters
-
+    beq .Ladvance8
 
     add x17, x14, x15       // Calculate subroutine to jump to.
     br x17
@@ -481,18 +607,26 @@
     bne .LisOther
 
     cmp x8, # 6*12          // Skip this load if all registers full.
-    beq .LfillRegisters
+    beq .Ladvance8
 
     add x17, x12, x8        // Calculate subroutine to jump to.
     br x17
 
-
 .LisOther:                  // Everything else takes one vReg.
     cmp x8, # 6*12          // Skip this load if all registers full.
-    beq .LfillRegisters
+    beq .Ladvance4
+
     add x17, x11, x8        // Calculate subroutine to jump to.
     br x17
 
+.Ladvance4:
+    add x9, x9, #4
+    b .LfillRegisters
+
+.Ladvance8:
+    add x9, x9, #8
+    b .LfillRegisters
+
 // Macro for loading a parameter into a register.
 //  counter - the register with offset into these tables
 //  size - the size of the register - 4 or 8 bytes.
@@ -546,48 +680,8 @@
 
 .LcallFunction:
 
-    // load method-> METHOD_QUICK_CODE_OFFSET
-    ldr x9, [x0 , #METHOD_QUICK_CODE_OFFSET]
-    // Branch to method.
-    blr x9
+    INVOKE_STUB_CALL_AND_RETURN
 
-    // Restore return value address and shorty address.
-    ldp x4,x5, [xFP, #16]
-    .cfi_restore x4
-    .cfi_restore x5
-
-    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
-    ldrb w10, [x5]
-
-    // Don't set anything for a void type.
-    cmp w10, #'V'
-    beq .Lexit_art_quick_invoke_stub
-
-    cmp w10, #'D'
-    bne .Lreturn_is_float
-    str d0, [x4]
-    b .Lexit_art_quick_invoke_stub
-
-.Lreturn_is_float:
-    cmp w10, #'F'
-    bne .Lreturn_is_int
-    str s0, [x4]
-    b .Lexit_art_quick_invoke_stub
-
-    // Just store x0. Doesn't matter if it is 64 or 32 bits.
-.Lreturn_is_int:
-    str x0, [x4]
-
-.Lexit_art_quick_invoke_stub:
-    ldr x2, [x29, #32]   // Restore stack pointer.
-    mov sp, x2
-    .cfi_restore sp
-
-    ldp x29, x30, [x29]    // Restore old frame pointer and link register.
-    .cfi_restore x29
-    .cfi_restore x30
-
-    ret
 END art_quick_invoke_stub
 
 /*  extern"C"
@@ -600,64 +694,7 @@
  */
 ENTRY art_quick_invoke_static_stub
     // Spill registers as per AACPS64 calling convention.
-
-SAVE_SIZE=5*8   // x4, x5, SP, LR & FP saved
-SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
-
-    mov x9, sp     // Save stack pointer.
-
-    mov x10, xFP   // Save frame pointer
-    .cfi_register x29,x10
-    add x11, x2, # SAVE_SIZE_AND_METHOD // calculate size of frame.
-
-    sub x11, sp, x11 // Calculate SP position - saves + ArtMethod* +  args
-
-    and x11, x11, # ~0xf  // Enforce 16 byte stack alignment.
-
-    sub xFP, x9, #SAVE_SIZE   // Calculate new FP. Don't store here until SP moved.
-
-    mov sp, x11        // set new SP.
-
-    .cfi_def_cfa_register   29
-
-    str x9, [xFP, #32]     // Save old stack pointer.
-
-    .cfi_offset x9, 32
-
-    stp x4, x5, [xFP, #16]  // Save result and shorty addresses.
-
-    .cfi_offset x4, 16
-    .cfi_offset x5, 24
-
-    stp x10, xLR, [x29]   // Store lr & old fp @ fp
-
-    .cfi_offset x30, 0
-    .cfi_offset x10, 8
-
-    mov xSELF, x3       // Move thread pointer into SELF register.
-
-    // Copy arguments into stack frame.
-    // Use simple copy routine for now.
-    // 4 bytes per slot.
-    // X1 - source address
-    // W2 - args length
-    // X10 - destination address.
-    add x9, sp, #8     // Destination address is bottom of stack + NULL.
-
-    // w2 = argsize parameter.
-.LcopyParams2:
-    cmp w2, #0
-    beq .LendCopyParams2
-    sub w2, w2, #4      // Need 65536 bytes of range.
-    ldr w10, [x1, x2]
-    str w10, [x9, x2]
-
-    b .LcopyParams2
-
-.LendCopyParams2:
-
-    // Store NULL into Method* at bottom of frame.
-    str xzr, [sp]
+    INVOKE_STUB_CREATE_FRAME
 
     // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
     // Parse the passed shorty to determine which register to load.
@@ -683,7 +720,7 @@
     bne .LisDouble2
 
     cmp x15, # 8*12         // Skip this load if all registers full.
-    beq .LfillRegisters2
+    beq .Ladvance4_2
 
     add x17, x13, x15       // Calculate subroutine to jump to.
     br  x17
@@ -693,8 +730,7 @@
     bne .LisLong2
 
     cmp x15, # 8*12         // Skip this load if all registers full.
-    beq .LfillRegisters2
-
+    beq .Ladvance8_2
 
     add x17, x14, x15       // Calculate subroutine to jump to.
     br x17
@@ -704,18 +740,26 @@
     bne .LisOther2
 
     cmp x8, # 7*12          // Skip this load if all registers full.
-    beq .LfillRegisters2
+    beq .Ladvance8_2
 
     add x17, x12, x8        // Calculate subroutine to jump to.
     br x17
 
-
 .LisOther2:                 // Everything else takes one vReg.
     cmp x8, # 7*12          // Skip this load if all registers full.
-    beq .LfillRegisters2
+    beq .Ladvance4_2
+
     add x17, x11, x8        // Calculate subroutine to jump to.
     br x17
 
+.Ladvance4_2:
+    add x9, x9, #4
+    b .LfillRegisters2
+
+.Ladvance8_2:
+    add x9, x9, #8
+    b .LfillRegisters2
+
 // Store ints.
 .LstoreW1_2:
     LOADREG x8 4 w1 .LfillRegisters2
@@ -761,52 +805,11 @@
 
 .LcallFunction2:
 
-    // load method-> METHOD_QUICK_CODE_OFFSET.
-    ldr x9, [x0 , #METHOD_QUICK_CODE_OFFSET]
-    // Branch to method.
-    blr x9
+    INVOKE_STUB_CALL_AND_RETURN
 
-    // Restore return value address and shorty address.
-    ldp x4, x5, [xFP, #16]
-    .cfi_restore x4
-    .cfi_restore x5
-
-    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
-    ldrb w10, [x5]
-
-    // Don't set anything for a void type.
-    cmp w10, #'V'
-    beq .Lexit_art_quick_invoke_stub2
-
-    cmp w10, #'D'
-    bne .Lreturn_is_float2
-    str d0, [x4]
-    b .Lexit_art_quick_invoke_stub2
-
-.Lreturn_is_float2:
-    cmp w10, #'F'
-    bne .Lreturn_is_int2
-    str s0, [x4]
-    b .Lexit_art_quick_invoke_stub2
-
-    // Just store x0. Doesn't matter if it is 64 or 32 bits.
-.Lreturn_is_int2:
-    str x0, [x4]
-
-.Lexit_art_quick_invoke_stub2:
-
-    ldr x2, [xFP, #32]   // Restore stack pointer.
-    mov sp, x2
-    .cfi_restore sp
-
-    ldp xFP, xLR, [xFP]    // Restore old frame pointer and link register.
-    .cfi_restore x29
-    .cfi_restore x30
-
-    ret
 END art_quick_invoke_static_stub
 
-// UNIMPLEMENTED art_quick_do_long_jump
+
 
     /*
      * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index 43c0ad6..2503918 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -56,7 +56,8 @@
   X29 = 29,
   X30 = 30,
   X31 = 31,
-  TR  = 18,     // ART Thread Register.
+  TR  = 18,     // ART Thread Register - Needs to be one of the callee saved regs.
+  TR1 = 19,     // FIXME!
   IP0 = 16,     // Used as scratch by VIXL.
   IP1 = 17,     // Used as scratch by ART JNI Assembler.
   FP  = 29,
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 336a0cc..fd2cfeb 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -561,8 +561,8 @@
     jz   .Lslow_unlock
     movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
     movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
-    test %ecx, %ecx
-    jb   .Lslow_unlock                    // lock word contains a monitor
+    test LITERAL(0xC0000000), %ecx
+    jnz  .Lslow_unlock                    // lock word contains a monitor
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     cmpl LITERAL(65536), %ecx
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 78b7cc0..e690b30 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -692,7 +692,7 @@
     while (true) {
       file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
       if (file_.get() == NULL) {
-        *error_msg = StringPrintf("Failed to open file '%s'", filename);
+        *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
         return false;
       }
       int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_EX));
@@ -741,51 +741,57 @@
 const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(const char* dex_location,
                                                               uint32_t dex_location_checksum,
                                                               const char* oat_location,
-                                                              std::string* error_msg) {
+                                                              std::vector<std::string>* error_msgs) {
   // We play a locking game here so that if two different processes
   // race to generate (or worse, one tries to open a partial generated
   // file) we will be okay. This is actually common with apps that use
   // DexClassLoader to work around the dex method reference limit and
   // that have a background service running in a separate process.
   ScopedFlock scoped_flock;
-  if (!scoped_flock.Init(oat_location, error_msg)) {
+  std::string error_msg;
+  if (!scoped_flock.Init(oat_location, &error_msg)) {
+    error_msgs->push_back(error_msg);
     return nullptr;
   }
 
   // Check if we already have an up-to-date output file
   const DexFile* dex_file = FindDexFileInOatLocation(dex_location, dex_location_checksum,
-                                                     oat_location, error_msg);
+                                                     oat_location, &error_msg);
   if (dex_file != nullptr) {
     return dex_file;
   }
-  VLOG(class_linker) << "Failed to find dex file '" << dex_location << "' in oat location '"
-      << oat_location << "': " << *error_msg;
-  error_msg->clear();
+  std::string compound_msg = StringPrintf("Failed to find dex file '%s' in oat location '%s': %s",
+                                          dex_location, oat_location, error_msg.c_str());
+  VLOG(class_linker) << compound_msg;
+  error_msgs->push_back(compound_msg);
 
   // Generate the output oat file for the dex file
   VLOG(class_linker) << "Generating oat file " << oat_location << " for " << dex_location;
-  if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, error_msg)) {
-    CHECK(!error_msg->empty());
+  if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, &error_msg)) {
+    CHECK(!error_msg.empty());
+    error_msgs->push_back(error_msg);
     return nullptr;
   }
   const OatFile* oat_file = OatFile::Open(oat_location, oat_location, NULL,
                                           !Runtime::Current()->IsCompiler(),
-                                          error_msg);
+                                          &error_msg);
   if (oat_file == nullptr) {
-    *error_msg = StringPrintf("Failed to open generated oat file '%s': %s",
-                              oat_location, error_msg->c_str());
+    compound_msg = StringPrintf("\nFailed to open generated oat file '%s': %s",
+                                oat_location, error_msg.c_str());
+    error_msgs->push_back(compound_msg);
     return nullptr;
   }
   oat_file = RegisterOatFile(oat_file);
   const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location,
                                                                     &dex_location_checksum);
   if (oat_dex_file == nullptr) {
-    *error_msg = StringPrintf("Failed to find dex file '%s' (checksum 0x%x) in generated out file "
-                              "'%s'", dex_location, dex_location_checksum, oat_location);
+    error_msg = StringPrintf("\nFailed to find dex file '%s' (checksum 0x%x) in generated out file "
+                             "'%s'", dex_location, dex_location_checksum, oat_location);
+    error_msgs->push_back(error_msg);
     return nullptr;
   }
-  const DexFile* result = oat_dex_file->OpenDexFile(error_msg);
-  CHECK(result != nullptr) << *error_msg;
+  const DexFile* result = oat_dex_file->OpenDexFile(&error_msg);
+  CHECK(result != nullptr) << error_msgs << ", " << error_msg;
   CHECK_EQ(dex_location_checksum, result->GetLocationChecksum())
           << "dex_location=" << dex_location << " oat_location=" << oat_location << std::hex
           << " dex_location_checksum=" << dex_location_checksum
@@ -880,27 +886,34 @@
 
 const DexFile* ClassLinker::FindDexFileInOatFileFromDexLocation(const char* dex_location,
                                                                 const uint32_t* const dex_location_checksum,
-                                                                std::string* error_msg) {
+                                                                std::vector<std::string>* error_msgs) {
   const OatFile* open_oat_file = FindOpenedOatFileFromDexLocation(dex_location,
                                                                   dex_location_checksum);
   if (open_oat_file != nullptr) {
     const OatFile::OatDexFile* oat_dex_file = open_oat_file->GetOatDexFile(dex_location,
                                                                            dex_location_checksum);
-    return oat_dex_file->OpenDexFile(error_msg);
+    std::string error_msg;
+    const DexFile* ret = oat_dex_file->OpenDexFile(&error_msg);
+    if (ret == nullptr) {
+      error_msgs->push_back(error_msg);
+    }
+    return ret;
   }
 
   // Look for an existing file next to dex. for example, for
   // /foo/bar/baz.jar, look for /foo/bar/baz.odex.
   std::string odex_filename(OatFile::DexFilenameToOdexFilename(dex_location));
   bool open_failed;
+  std::string error_msg;
   const DexFile* dex_file = VerifyAndOpenDexFileFromOatFile(odex_filename, dex_location,
-                                                            error_msg, &open_failed);
+                                                            &error_msg, &open_failed);
   if (dex_file != nullptr) {
     return dex_file;
   }
   if (dex_location_checksum == nullptr) {
-    *error_msg = StringPrintf("Failed to open oat file from %s and no classes.dex found in %s: %s",
-                              odex_filename.c_str(), dex_location, error_msg->c_str());
+    error_msgs->push_back(StringPrintf("Failed to open oat file from %s and no classes.dex found in"
+                                      "%s: %s", odex_filename.c_str(), dex_location,
+                                       error_msg.c_str()));
     return nullptr;
   }
 
@@ -914,14 +927,15 @@
   if (!open_failed && TEMP_FAILURE_RETRY(unlink(cache_location.c_str())) != 0) {
     PLOG(FATAL) << "Failed to remove obsolete oat file from " << cache_location;
   }
-  VLOG(class_linker) << "Failed to open oat file from " << odex_filename
-      << " (error '" << *error_msg << "') or " << cache_location
-      << " (error '" << cache_error_msg << "').";
+  std::string compound_msg = StringPrintf("Failed to open oat file from %s (error '%s') or %s "
+                                          "(error '%s').", odex_filename.c_str(), error_msg.c_str(),
+                                          cache_location.c_str(), cache_error_msg.c_str());
+  VLOG(class_linker) << compound_msg;
+  error_msgs->push_back(compound_msg);
 
   // Try to generate oat file if it wasn't found or was obsolete.
-  error_msg->clear();
   return FindOrCreateOatFileForDexLocation(dex_location, *dex_location_checksum,
-                                           cache_location.c_str(), error_msg);
+                                           cache_location.c_str(), error_msgs);
 }
 
 const OatFile* ClassLinker::FindOpenedOatFileFromOatLocation(const std::string& oat_location) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 701e62e..d684ad5 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -273,14 +273,14 @@
   const DexFile* FindOrCreateOatFileForDexLocation(const char* dex_location,
                                                    uint32_t dex_location_checksum,
                                                    const char* oat_location,
-                                                   std::string* error_msg)
+                                                   std::vector<std::string>* error_msgs)
       LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
   // Find a DexFile within an OatFile given a DexFile location. Note
   // that this returns null if the location checksum of the DexFile
   // does not match the OatFile.
   const DexFile* FindDexFileInOatFileFromDexLocation(const char* location,
                                                      const uint32_t* const location_checksum,
-                                                     std::string* error_msg)
+                                                     std::vector<std::string>* error_msgs)
       LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
 
 
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 4b6d82b..315f274 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -66,6 +66,28 @@
   }
 }
 
+static void ThrowWrappedException(const ThrowLocation* throw_location,
+                                  const char* exception_descriptor,
+                                  mirror::Class* referrer, const char* fmt, va_list* args = NULL)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  std::ostringstream msg;
+  if (args != NULL) {
+    std::string vmsg;
+    StringAppendV(&vmsg, fmt, *args);
+    msg << vmsg;
+  } else {
+    msg << fmt;
+  }
+  AddReferrerLocation(msg, referrer);
+  Thread* self = Thread::Current();
+  if (throw_location == NULL) {
+    ThrowLocation computed_throw_location = self->GetCurrentLocationForThrow();
+    self->ThrowNewWrappedException(computed_throw_location, exception_descriptor, msg.str().c_str());
+  } else {
+    self->ThrowNewWrappedException(*throw_location, exception_descriptor, msg.str().c_str());
+  }
+}
+
 // AbstractMethodError
 
 void ThrowAbstractMethodError(mirror::ArtMethod* method) {
@@ -243,6 +265,13 @@
   va_end(args);
 }
 
+void ThrowWrappedIOException(const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  ThrowWrappedException(NULL, "Ljava/io/IOException;", NULL, fmt, &args);
+  va_end(args);
+}
+
 // LinkageError
 
 void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index c06763e..ebedae0 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -126,6 +126,9 @@
 void ThrowIOException(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowWrappedIOException(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2)))
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
+
 // LinkageError
 
 void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...)
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 514ad4c..c52a588 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -126,14 +126,14 @@
   return os;
 }
 
-class DebugInstrumentationListener : public instrumentation::InstrumentationListener {
+class DebugInstrumentationListener FINAL : public instrumentation::InstrumentationListener {
  public:
   DebugInstrumentationListener() {}
   virtual ~DebugInstrumentationListener() {}
 
-  virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             mirror::ArtMethod* method, uint32_t dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void MethodEntered(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                     uint32_t dex_pc)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
@@ -141,10 +141,9 @@
     Dbg::PostLocationEvent(method, 0, this_object, Dbg::kMethodEntry, nullptr);
   }
 
-  virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            mirror::ArtMethod* method,
-                            uint32_t dex_pc, const JValue& return_value)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void MethodExited(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                    uint32_t dex_pc, const JValue& return_value)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
@@ -152,26 +151,41 @@
     Dbg::PostLocationEvent(method, dex_pc, this_object, Dbg::kMethodExit, &return_value);
   }
 
-  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            mirror::ArtMethod* method, uint32_t dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void MethodUnwind(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                    uint32_t dex_pc)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
                << " " << dex_pc;
   }
 
-  virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          mirror::ArtMethod* method, uint32_t new_dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void DexPcMoved(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                  uint32_t new_dex_pc)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc);
   }
 
-  virtual void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
-                               mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
-                               mirror::Throwable* exception_object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Dbg::PostException(thread, throw_location, catch_method, catch_dex_pc, exception_object);
+  void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                 uint32_t dex_pc, mirror::ArtField* field)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Dbg::PostFieldAccessEvent(method, dex_pc, this_object, field);
   }
+
+  void FieldWritten(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                    uint32_t dex_pc, mirror::ArtField* field, const JValue& field_value)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Dbg::PostFieldModificationEvent(method, dex_pc, this_object, field, &field_value);
+  }
+
+  void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
+                       mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
+                       mirror::Throwable* exception_object)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Dbg::PostException(throw_location, catch_method, catch_dex_pc, exception_object);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DebugInstrumentationListener);
 } gDebugInstrumentationListener;
 
 // JDWP is allowed unless the Zygote forbids it.
@@ -231,6 +245,14 @@
   }
 }
 
+void DebugInvokeReq::Clear() {
+  invoke_needed = false;
+  receiver = nullptr;
+  thread = nullptr;
+  klass = nullptr;
+  method = nullptr;
+}
+
 void SingleStepControl::VisitRoots(RootCallback* callback, void* arg, uint32_t tid,
                                    RootType root_type) {
   if (method != nullptr) {
@@ -238,6 +260,16 @@
   }
 }
 
+bool SingleStepControl::ContainsDexPc(uint32_t dex_pc) const {
+  return dex_pcs.find(dex_pc) == dex_pcs.end();
+}
+
+void SingleStepControl::Clear() {
+  is_active = false;
+  method = nullptr;
+  dex_pcs.clear();
+}
+
 void DeoptimizationRequest::VisitRoots(RootCallback* callback, void* arg) {
   if (method != nullptr) {
     callback(reinterpret_cast<mirror::Object**>(&method), arg, 0, kRootDebugger);
@@ -607,6 +639,14 @@
   return gDisposed;
 }
 
+// All the instrumentation events the debugger is registered for.
+static constexpr uint32_t kListenerEvents = instrumentation::Instrumentation::kMethodEntered |
+                                            instrumentation::Instrumentation::kMethodExited |
+                                            instrumentation::Instrumentation::kDexPcMoved |
+                                            instrumentation::Instrumentation::kFieldRead |
+                                            instrumentation::Instrumentation::kFieldWritten |
+                                            instrumentation::Instrumentation::kExceptionCaught;
+
 void Dbg::GoActive() {
   // Enable all debugging features, including scans for breakpoints.
   // This is a no-op if we're already active.
@@ -633,11 +673,7 @@
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
   CHECK_NE(old_state, kRunnable);
   runtime->GetInstrumentation()->EnableDeoptimization();
-  runtime->GetInstrumentation()->AddListener(&gDebugInstrumentationListener,
-                                             instrumentation::Instrumentation::kMethodEntered |
-                                             instrumentation::Instrumentation::kMethodExited |
-                                             instrumentation::Instrumentation::kDexPcMoved |
-                                             instrumentation::Instrumentation::kExceptionCaught);
+  runtime->GetInstrumentation()->AddListener(&gDebugInstrumentationListener, kListenerEvents);
   gDebuggerActive = true;
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   runtime->GetThreadList()->ResumeAll();
@@ -668,11 +704,7 @@
       deoptimization_requests_.clear();
       full_deoptimization_event_count_ = 0U;
     }
-    runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
-                                                  instrumentation::Instrumentation::kMethodEntered |
-                                                  instrumentation::Instrumentation::kMethodExited |
-                                                  instrumentation::Instrumentation::kDexPcMoved |
-                                                  instrumentation::Instrumentation::kExceptionCaught);
+    runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener, kListenerEvents);
     runtime->GetInstrumentation()->DisableDeoptimization();
     gDebuggerActive = false;
   }
@@ -1572,6 +1604,13 @@
   OutputJValue(tag, return_value, pReply);
 }
 
+void Dbg::OutputFieldValue(JDWP::FieldId field_id, const JValue* field_value,
+                           JDWP::ExpandBuf* pReply) {
+  mirror::ArtField* f = FromFieldId(field_id);
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
+  OutputJValue(tag, field_value, pReply);
+}
+
 JDWP::JdwpError Dbg::GetBytecodes(JDWP::RefTypeId, JDWP::MethodId method_id,
                                   std::vector<uint8_t>& bytecodes)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -2444,21 +2483,70 @@
   return visitor.error_;
 }
 
+JDWP::ObjectId Dbg::GetThisObjectIdForEvent(mirror::Object* this_object) {
+  // If 'this_object' isn't already in the registry, we know that we're not looking for it, so
+  // there's no point adding it to the registry and burning through ids.
+  // When registering an event request with an instance filter, we've been given an existing object
+  // id so it must already be present in the registry when the event fires.
+  JDWP::ObjectId this_id = 0;
+  if (this_object != nullptr && gRegistry->Contains(this_object)) {
+    this_id = gRegistry->Add(this_object);
+  }
+  return this_id;
+}
+
 void Dbg::PostLocationEvent(mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
                             int event_flags, const JValue* return_value) {
+  if (!IsDebuggerActive()) {
+    return;
+  }
+  DCHECK(m != nullptr);
+  DCHECK_EQ(m->IsStatic(), this_object == nullptr);
   JDWP::JdwpLocation location;
   SetLocation(location, m, dex_pc);
 
-  // If 'this_object' isn't already in the registry, we know that we're not looking for it,
-  // so there's no point adding it to the registry and burning through ids.
-  JDWP::ObjectId this_id = 0;
-  if (gRegistry->Contains(this_object)) {
-    this_id = gRegistry->Add(this_object);
-  }
+  // We need 'this' for InstanceOnly filters only.
+  JDWP::ObjectId this_id = GetThisObjectIdForEvent(this_object);
   gJdwpState->PostLocationEvent(&location, this_id, event_flags, return_value);
 }
 
-void Dbg::PostException(Thread* thread, const ThrowLocation& throw_location,
+void Dbg::PostFieldAccessEvent(mirror::ArtMethod* m, int dex_pc,
+                               mirror::Object* this_object, mirror::ArtField* f) {
+  if (!IsDebuggerActive()) {
+    return;
+  }
+  DCHECK(m != nullptr);
+  DCHECK(f != nullptr);
+  JDWP::JdwpLocation location;
+  SetLocation(location, m, dex_pc);
+
+  JDWP::RefTypeId type_id = gRegistry->AddRefType(f->GetDeclaringClass());
+  JDWP::FieldId field_id = ToFieldId(f);
+  JDWP::ObjectId this_id = gRegistry->Add(this_object);
+
+  gJdwpState->PostFieldEvent(&location, type_id, field_id, this_id, nullptr, false);
+}
+
+void Dbg::PostFieldModificationEvent(mirror::ArtMethod* m, int dex_pc,
+                                     mirror::Object* this_object, mirror::ArtField* f,
+                                     const JValue* field_value) {
+  if (!IsDebuggerActive()) {
+    return;
+  }
+  DCHECK(m != nullptr);
+  DCHECK(f != nullptr);
+  DCHECK(field_value != nullptr);
+  JDWP::JdwpLocation location;
+  SetLocation(location, m, dex_pc);
+
+  JDWP::RefTypeId type_id = gRegistry->AddRefType(f->GetDeclaringClass());
+  JDWP::FieldId field_id = ToFieldId(f);
+  JDWP::ObjectId this_id = gRegistry->Add(this_object);
+
+  gJdwpState->PostFieldEvent(&location, type_id, field_id, this_id, field_value, true);
+}
+
+void Dbg::PostException(const ThrowLocation& throw_location,
                         mirror::ArtMethod* catch_method,
                         uint32_t catch_dex_pc, mirror::Throwable* exception_object) {
   if (!IsDebuggerActive()) {
@@ -2470,8 +2558,8 @@
   JDWP::JdwpLocation catch_location;
   SetLocation(catch_location, catch_method, catch_dex_pc);
 
-  // We need 'this' for InstanceOnly filters.
-  JDWP::ObjectId this_id = gRegistry->Add(throw_location.GetThis());
+  // We need 'this' for InstanceOnly filters only.
+  JDWP::ObjectId this_id = GetThisObjectIdForEvent(throw_location.GetThis());
   JDWP::ObjectId exception_id = gRegistry->Add(exception_object);
   JDWP::RefTypeId exception_class_id = gRegistry->AddRefType(exception_object->GetClass());
 
@@ -2521,7 +2609,7 @@
       } else if (single_step_control->step_size == JDWP::SS_MIN) {
         event_flags |= kSingleStep;
         VLOG(jdwp) << "SS new instruction";
-      } else if (single_step_control->dex_pcs.find(dex_pc) == single_step_control->dex_pcs.end()) {
+      } else if (single_step_control->ContainsDexPc(dex_pc)) {
         event_flags |= kSingleStep;
         VLOG(jdwp) << "SS new line";
       }
@@ -2543,7 +2631,7 @@
         if (single_step_control->step_size == JDWP::SS_MIN) {
           event_flags |= kSingleStep;
           VLOG(jdwp) << "SS new instruction";
-        } else if (single_step_control->dex_pcs.find(dex_pc) == single_step_control->dex_pcs.end()) {
+        } else if (single_step_control->ContainsDexPc(dex_pc)) {
           event_flags |= kSingleStep;
           VLOG(jdwp) << "SS new line";
         }
@@ -2910,8 +2998,9 @@
   //
 
   struct DebugCallbackContext {
-    explicit DebugCallbackContext(SingleStepControl* single_step_control, int32_t line_number)
-      : single_step_control_(single_step_control), line_number_(line_number),
+    explicit DebugCallbackContext(SingleStepControl* single_step_control, int32_t line_number,
+                                  const DexFile::CodeItem* code_item)
+      : single_step_control_(single_step_control), line_number_(line_number), code_item_(code_item),
         last_pc_valid(false), last_pc(0) {
     }
 
@@ -2938,7 +3027,7 @@
     ~DebugCallbackContext() {
       // If the line number was the last in the position table...
       if (last_pc_valid) {
-        size_t end = MethodHelper(single_step_control_->method).GetCodeItem()->insns_size_in_code_units_;
+        size_t end = code_item_->insns_size_in_code_units_;
         for (uint32_t dex_pc = last_pc; dex_pc < end; ++dex_pc) {
           single_step_control_->dex_pcs.insert(dex_pc);
         }
@@ -2947,15 +3036,17 @@
 
     SingleStepControl* const single_step_control_;
     const int32_t line_number_;
+    const DexFile::CodeItem* const code_item_;
     bool last_pc_valid;
     uint32_t last_pc;
   };
   single_step_control->dex_pcs.clear();
   mirror::ArtMethod* m = single_step_control->method;
   if (!m->IsNative()) {
-    DebugCallbackContext context(single_step_control, line_number);
     MethodHelper mh(m);
-    mh.GetDexFile().DecodeDebugInfo(mh.GetCodeItem(), m->IsStatic(), m->GetDexMethodIndex(),
+    const DexFile::CodeItem* const code_item = mh.GetCodeItem();
+    DebugCallbackContext context(single_step_control, line_number, code_item);
+    mh.GetDexFile().DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
                                     DebugCallbackContext::Callback, NULL, &context);
   }
 
@@ -2975,8 +3066,8 @@
     VLOG(jdwp) << "Single-step current line: " << line_number;
     VLOG(jdwp) << "Single-step current stack depth: " << single_step_control->stack_depth;
     VLOG(jdwp) << "Single-step dex_pc values:";
-    for (std::set<uint32_t>::iterator it = single_step_control->dex_pcs.begin(); it != single_step_control->dex_pcs.end(); ++it) {
-      VLOG(jdwp) << StringPrintf(" %#x", *it);
+    for (uint32_t dex_pc : single_step_control->dex_pcs) {
+      VLOG(jdwp) << StringPrintf(" %#x", dex_pc);
     }
   }
 
@@ -2991,8 +3082,7 @@
   if (error == JDWP::ERR_NONE) {
     SingleStepControl* single_step_control = thread->GetSingleStepControl();
     DCHECK(single_step_control != nullptr);
-    single_step_control->is_active = false;
-    single_step_control->dex_pcs.clear();
+    single_step_control->Clear();
   }
 }
 
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 23c9c6a..b3e94c3 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -35,6 +35,7 @@
 
 namespace art {
 namespace mirror {
+class ArtField;
 class ArtMethod;
 class Class;
 class Object;
@@ -85,6 +86,8 @@
   void VisitRoots(RootCallback* callback, void* arg, uint32_t tid, RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void Clear();
+
  private:
   DISALLOW_COPY_AND_ASSIGN(DebugInvokeReq);
 };
@@ -118,6 +121,10 @@
   void VisitRoots(RootCallback* callback, void* arg, uint32_t tid, RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool ContainsDexPc(uint32_t dex_pc) const;
+
+  void Clear();
+
  private:
   DISALLOW_COPY_AND_ASSIGN(SingleStepControl);
 };
@@ -297,6 +304,9 @@
   static void OutputMethodReturnValue(JDWP::MethodId method_id, const JValue* return_value,
                                       JDWP::ExpandBuf* pReply)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void OutputFieldValue(JDWP::FieldId field_id, const JValue* field_value,
+                               JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError GetBytecodes(JDWP::RefTypeId class_id, JDWP::MethodId method_id,
                                       std::vector<uint8_t>& bytecodes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -411,8 +421,14 @@
                                 mirror::Object* thisPtr, int eventFlags,
                                 const JValue* return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void PostException(Thread* thread, const ThrowLocation& throw_location,
-                            mirror::ArtMethod* catch_method,
+  static void PostFieldAccessEvent(mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
+                                   mirror::ArtField* f)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void PostFieldModificationEvent(mirror::ArtMethod* m, int dex_pc,
+                                         mirror::Object* this_object, mirror::ArtField* f,
+                                         const JValue* field_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void PostException(const ThrowLocation& throw_location, mirror::ArtMethod* catch_method,
                             uint32_t catch_dex_pc, mirror::Throwable* exception)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostThreadStart(Thread* t)
@@ -538,6 +554,9 @@
   static void PostThreadStartOrStop(Thread*, uint32_t)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static JDWP::ObjectId GetThisObjectIdForEvent(mirror::Object* this_object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static void ProcessDeoptimizationRequest(const DeoptimizationRequest& request)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index fcbcac2..2b29591 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -250,6 +250,7 @@
         if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) {
           return fpr_args_ + (fpr_index_ * kBytesPerFprSpillLocation);
         }
+        return stack_args_ + (stack_index_ * kBytesStackArgLocation);
       }
     }
     if (gpr_index_ < kNumQuickGprArgs) {
@@ -283,6 +284,12 @@
   }
 
   void VisitArguments() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // This implementation doesn't support reg-spill area for hard float
+    // ABI targets such as x86_64 and aarch64. So, for those targets whose
+    // 'kQuickSoftFloatAbi' is 'false':
+    //     (a) 'stack_args_' should point to the first method's argument
+    //     (b) whatever the argument type it is, the 'stack_index_' should
+    //         be moved forward along with every visiting.
     gpr_index_ = 0;
     fpr_index_ = 0;
     stack_index_ = 0;
@@ -290,10 +297,11 @@
       cur_type_ = Primitive::kPrimNot;
       is_split_long_or_double_ = false;
       Visit();
+      if (!kQuickSoftFloatAbi || kNumQuickGprArgs == 0) {
+        stack_index_++;
+      }
       if (kNumQuickGprArgs > 0) {
         gpr_index_++;
-      } else {
-        stack_index_++;
       }
     }
     for (uint32_t shorty_index = 1; shorty_index < shorty_len_; ++shorty_index) {
@@ -307,10 +315,11 @@
         case Primitive::kPrimInt:
           is_split_long_or_double_ = false;
           Visit();
+          if (!kQuickSoftFloatAbi || kNumQuickGprArgs == gpr_index_) {
+            stack_index_++;
+          }
           if (gpr_index_ < kNumQuickGprArgs) {
             gpr_index_++;
-          } else {
-            stack_index_++;
           }
           break;
         case Primitive::kPrimFloat:
@@ -325,9 +334,8 @@
           } else {
             if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) {
               fpr_index_++;
-            } else {
-              stack_index_++;
             }
+            stack_index_++;
           }
           break;
         case Primitive::kPrimDouble:
@@ -336,16 +344,7 @@
             is_split_long_or_double_ = (kBytesPerGprSpillLocation == 4) &&
                 ((gpr_index_ + 1) == kNumQuickGprArgs);
             Visit();
-            if (gpr_index_ < kNumQuickGprArgs) {
-              gpr_index_++;
-              if (kBytesPerGprSpillLocation == 4) {
-                if (gpr_index_ < kNumQuickGprArgs) {
-                  gpr_index_++;
-                } else {
-                  stack_index_++;
-                }
-              }
-            } else {
+            if (!kQuickSoftFloatAbi || kNumQuickGprArgs == gpr_index_) {
               if (kBytesStackArgLocation == 4) {
                 stack_index_+= 2;
               } else {
@@ -353,6 +352,16 @@
                 stack_index_++;
               }
             }
+            if (gpr_index_ < kNumQuickGprArgs) {
+              gpr_index_++;
+              if (kBytesPerGprSpillLocation == 4) {
+                if (gpr_index_ < kNumQuickGprArgs) {
+                  gpr_index_++;
+                } else if (kQuickSoftFloatAbi) {
+                  stack_index_++;
+                }
+              }
+            }
           } else {
             is_split_long_or_double_ = (kBytesPerFprSpillLocation == 4) &&
                 ((fpr_index_ + 1) == kNumQuickFprArgs);
@@ -362,17 +371,14 @@
               if (kBytesPerFprSpillLocation == 4) {
                 if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) {
                   fpr_index_++;
-                } else {
-                  stack_index_++;
                 }
               }
+            }
+            if (kBytesStackArgLocation == 4) {
+              stack_index_+= 2;
             } else {
-              if (kBytesStackArgLocation == 4) {
-                stack_index_+= 2;
-              } else {
-                CHECK_EQ(kBytesStackArgLocation, 8U);
-                stack_index_++;
-              }
+              CHECK_EQ(kBytesStackArgLocation, 8U);
+              stack_index_++;
             }
           }
           break;
@@ -389,59 +395,10 @@
       CHECK_EQ(kNumQuickFprArgs, 0U);
       return (kNumQuickGprArgs * kBytesPerGprSpillLocation) + kBytesPerGprSpillLocation /* ArtMethod* */;
     } else {
-      size_t offset = kBytesPerGprSpillLocation;  // Skip Method*.
-      size_t gprs_seen = 0;
-      size_t fprs_seen = 0;
-      if (!is_static && (gprs_seen < kNumQuickGprArgs)) {
-        gprs_seen++;
-        offset += kBytesStackArgLocation;
-      }
-      for (uint32_t i = 1; i < shorty_len; ++i) {
-        switch (shorty[i]) {
-          case 'Z':
-          case 'B':
-          case 'C':
-          case 'S':
-          case 'I':
-          case 'L':
-            if (gprs_seen < kNumQuickGprArgs) {
-              gprs_seen++;
-              offset += kBytesStackArgLocation;
-            }
-            break;
-          case 'J':
-            if (gprs_seen < kNumQuickGprArgs) {
-              gprs_seen++;
-              offset += 2 * kBytesStackArgLocation;
-              if (kBytesPerGprSpillLocation == 4) {
-                if (gprs_seen < kNumQuickGprArgs) {
-                  gprs_seen++;
-                }
-              }
-            }
-            break;
-          case 'F':
-            if ((kNumQuickFprArgs != 0) && (fprs_seen + 1 < kNumQuickFprArgs + 1)) {
-              fprs_seen++;
-              offset += kBytesStackArgLocation;
-            }
-            break;
-          case 'D':
-            if ((kNumQuickFprArgs != 0) && (fprs_seen + 1 < kNumQuickFprArgs + 1)) {
-              fprs_seen++;
-              offset += 2 * kBytesStackArgLocation;
-              if (kBytesPerFprSpillLocation == 4) {
-                if ((kNumQuickFprArgs != 0) && (fprs_seen + 1 < kNumQuickFprArgs + 1)) {
-                  fprs_seen++;
-                }
-              }
-            }
-            break;
-          default:
-            LOG(FATAL) << "Unexpected shorty character: " << shorty[i] << " in " << shorty;
-        }
-      }
-      return offset;
+      // For now, there is no reg-spill area for the targets with
+      // hard float ABI. So, the offset pointing to the first method's
+      // parameter ('this' for non-static methods) should be returned.
+      return kBytesPerGprSpillLocation;  // Skip Method*.
     }
   }
 
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index 564168e..a1d001e 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -43,7 +43,7 @@
 }
 
 template <typename Visitor>
-inline size_t CardTable::Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end,
+inline size_t CardTable::Scan(ContinuousSpaceBitmap* bitmap, byte* scan_begin, byte* scan_end,
                               const Visitor& visitor, const byte minimum_age) const {
   DCHECK(bitmap->HasAddress(scan_begin));
   DCHECK(bitmap->HasAddress(scan_end - 1));  // scan_end is the byte after the last byte we scan.
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 8b7bfd3..8d5dc07 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -38,7 +38,7 @@
 
 namespace accounting {
 
-class SpaceBitmap;
+template<size_t kAlignment> class SpaceBitmap;
 
 // Maintain a card table from the the write barrier. All writes of
 // non-NULL values to heap addresses should go through an entry in
@@ -102,7 +102,8 @@
   // For every dirty at least minumum age between begin and end invoke the visitor with the
   // specified argument. Returns how many cards the visitor was run on.
   template <typename Visitor>
-  size_t Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end, const Visitor& visitor,
+  size_t Scan(SpaceBitmap<kObjectAlignment>* bitmap, byte* scan_begin, byte* scan_end,
+              const Visitor& visitor,
               const byte minimum_age = kCardDirty) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/accounting/heap_bitmap-inl.h b/runtime/gc/accounting/heap_bitmap-inl.h
index 04e85d2..ed7b427 100644
--- a/runtime/gc/accounting/heap_bitmap-inl.h
+++ b/runtime/gc/accounting/heap_bitmap-inl.h
@@ -37,16 +37,16 @@
 }
 
 inline bool HeapBitmap::Test(const mirror::Object* obj) {
-  SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
+  ContinuousSpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
   if (LIKELY(bitmap != nullptr)) {
     return bitmap->Test(obj);
   } else {
-    return GetDiscontinuousSpaceObjectSet(obj) != NULL;
+    return GetDiscontinuousSpaceObjectSet(obj) != nullptr;
   }
 }
 
 inline void HeapBitmap::Clear(const mirror::Object* obj) {
-  SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
+  ContinuousSpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
   if (LIKELY(bitmap != nullptr)) {
     bitmap->Clear(obj);
   } else {
@@ -57,7 +57,7 @@
 }
 
 inline void HeapBitmap::Set(const mirror::Object* obj) {
-  SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
+  ContinuousSpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
   if (LIKELY(bitmap != NULL)) {
     bitmap->Set(obj);
   } else {
@@ -67,7 +67,7 @@
   }
 }
 
-inline SpaceBitmap* HeapBitmap::GetContinuousSpaceBitmap(const mirror::Object* obj) const {
+inline ContinuousSpaceBitmap* HeapBitmap::GetContinuousSpaceBitmap(const mirror::Object* obj) const {
   for (const auto& bitmap : continuous_space_bitmaps_) {
     if (bitmap->HasAddress(obj)) {
       return bitmap;
diff --git a/runtime/gc/accounting/heap_bitmap.cc b/runtime/gc/accounting/heap_bitmap.cc
index f94cf24..1db886c 100644
--- a/runtime/gc/accounting/heap_bitmap.cc
+++ b/runtime/gc/accounting/heap_bitmap.cc
@@ -16,13 +16,15 @@
 
 #include "heap_bitmap.h"
 
+#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/space/space.h"
 
 namespace art {
 namespace gc {
 namespace accounting {
 
-void HeapBitmap::ReplaceBitmap(SpaceBitmap* old_bitmap, SpaceBitmap* new_bitmap) {
+void HeapBitmap::ReplaceBitmap(ContinuousSpaceBitmap* old_bitmap,
+                               ContinuousSpaceBitmap* new_bitmap) {
   for (auto& bitmap : continuous_space_bitmaps_) {
     if (bitmap == old_bitmap) {
       bitmap = new_bitmap;
@@ -42,7 +44,7 @@
   LOG(FATAL) << "object set " << static_cast<const void*>(old_set) << " not found";
 }
 
-void HeapBitmap::AddContinuousSpaceBitmap(accounting::SpaceBitmap* bitmap) {
+void HeapBitmap::AddContinuousSpaceBitmap(accounting::ContinuousSpaceBitmap* bitmap) {
   DCHECK(bitmap != NULL);
 
   // Check for interval overlap.
@@ -55,14 +57,14 @@
   continuous_space_bitmaps_.push_back(bitmap);
 }
 
-void HeapBitmap::RemoveContinuousSpaceBitmap(accounting::SpaceBitmap* bitmap) {
+void HeapBitmap::RemoveContinuousSpaceBitmap(accounting::ContinuousSpaceBitmap* bitmap) {
   auto it = std::find(continuous_space_bitmaps_.begin(), continuous_space_bitmaps_.end(), bitmap);
   DCHECK(it != continuous_space_bitmaps_.end());
   continuous_space_bitmaps_.erase(it);
 }
 
 void HeapBitmap::AddDiscontinuousObjectSet(ObjectSet* set) {
-  DCHECK(set != NULL);
+  DCHECK(set != nullptr);
   discontinuous_space_sets_.push_back(set);
 }
 
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index f729c0e..61a2429 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -34,7 +34,7 @@
   bool Test(const mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
   void Clear(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
   void Set(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-  SpaceBitmap* GetContinuousSpaceBitmap(const mirror::Object* obj) const;
+  ContinuousSpaceBitmap* GetContinuousSpaceBitmap(const mirror::Object* obj) const;
   ObjectSet* GetDiscontinuousSpaceObjectSet(const mirror::Object* obj) const;
 
   void Walk(ObjectCallback* callback, void* arg)
@@ -46,7 +46,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find and replace a bitmap pointer, this is used by for the bitmap swapping in the GC.
-  void ReplaceBitmap(SpaceBitmap* old_bitmap, SpaceBitmap* new_bitmap)
+  void ReplaceBitmap(ContinuousSpaceBitmap* old_bitmap, ContinuousSpaceBitmap* new_bitmap)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Find and replace a object set pointer, this is used by for the bitmap swapping in the GC.
@@ -58,13 +58,14 @@
  private:
   const Heap* const heap_;
 
-  void AddContinuousSpaceBitmap(SpaceBitmap* bitmap);
-  void RemoveContinuousSpaceBitmap(SpaceBitmap* bitmap);
+  void AddContinuousSpaceBitmap(ContinuousSpaceBitmap* bitmap);
+  void RemoveContinuousSpaceBitmap(ContinuousSpaceBitmap* bitmap);
   void AddDiscontinuousObjectSet(ObjectSet* set);
   void RemoveDiscontinuousObjectSet(ObjectSet* set);
 
   // Bitmaps covering continuous spaces.
-  std::vector<SpaceBitmap*, GcAllocator<SpaceBitmap*>> continuous_space_bitmaps_;
+  std::vector<ContinuousSpaceBitmap*, GcAllocator<ContinuousSpaceBitmap*>>
+      continuous_space_bitmaps_;
 
   // Sets covering discontinuous spaces.
   std::vector<ObjectSet*, GcAllocator<ObjectSet*>> discontinuous_space_sets_;
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 34ca654..d744dee 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -19,6 +19,7 @@
 #include "base/stl_util.h"
 #include "card_table-inl.h"
 #include "heap_bitmap.h"
+#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/collector/mark_sweep.h"
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/heap.h"
@@ -222,7 +223,7 @@
 
   // Check the references of each clean card which is also in the mod union table.
   CardTable* card_table = heap_->GetCardTable();
-  SpaceBitmap* live_bitmap = space_->GetLiveBitmap();
+  ContinuousSpaceBitmap* live_bitmap = space_->GetLiveBitmap();
   for (const auto& ref_pair : references_) {
     const byte* card = ref_pair.first;
     if (*card == CardTable::kCardClean) {
@@ -272,7 +273,7 @@
     uintptr_t end = start + CardTable::kCardSize;
     auto* space = heap_->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
     DCHECK(space != nullptr);
-    SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
     live_bitmap->VisitMarkedRange(start, end, add_visitor);
 
     // Update the corresponding references for the card.
@@ -312,7 +313,7 @@
                                                      void* arg) {
   CardTable* card_table = heap_->GetCardTable();
   ModUnionScanImageRootVisitor scan_visitor(callback, arg);
-  SpaceBitmap* bitmap = space_->GetLiveBitmap();
+  ContinuousSpaceBitmap* bitmap = space_->GetLiveBitmap();
   for (const byte* card_addr : cleared_cards_) {
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
     DCHECK(space_->HasAddress(reinterpret_cast<Object*>(start)));
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index c3a90e2..5ae7c77 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -44,7 +44,6 @@
 
 namespace accounting {
 
-class SpaceBitmap;
 class HeapBitmap;
 
 // The mod-union table is the union of modified cards. It is used to allow the card table to be
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index 56f7caa..044216e 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -112,7 +112,7 @@
   bool contains_reference_to_target_space = false;
   RememberedSetObjectVisitor obj_visitor(callback, target_space,
                                          &contains_reference_to_target_space, arg);
-  SpaceBitmap* bitmap = space_->GetLiveBitmap();
+  ContinuousSpaceBitmap* bitmap = space_->GetLiveBitmap();
   CardSet remove_card_set;
   for (byte* const card_addr : dirty_cards_) {
     contains_reference_to_target_space = false;
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 880ff1f..ed140e0 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -17,14 +17,26 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_INL_H_
 #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_INL_H_
 
+#include "space_bitmap.h"
+
 #include "base/logging.h"
+#include "dex_file-inl.h"
+#include "heap_bitmap.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "object_utils.h"
+#include "space_bitmap-inl.h"
+#include "UniquePtr.h"
 #include "utils.h"
 
 namespace art {
 namespace gc {
 namespace accounting {
 
-inline bool SpaceBitmap::AtomicTestAndSet(const mirror::Object* obj) {
+template<size_t kAlignment>
+inline bool SpaceBitmap<kAlignment>::AtomicTestAndSet(const mirror::Object* obj) {
   uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
   DCHECK_GE(addr, heap_begin_);
   const uintptr_t offset = addr - heap_begin_;
@@ -45,7 +57,8 @@
   return false;
 }
 
-inline bool SpaceBitmap::Test(const mirror::Object* obj) const {
+template<size_t kAlignment>
+inline bool SpaceBitmap<kAlignment>::Test(const mirror::Object* obj) const {
   uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
   DCHECK(HasAddress(obj)) << obj;
   DCHECK(bitmap_begin_ != NULL);
@@ -54,9 +67,9 @@
   return (bitmap_begin_[OffsetToIndex(offset)] & OffsetToMask(offset)) != 0;
 }
 
-template <typename Visitor>
-void SpaceBitmap::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
-                                   const Visitor& visitor) const {
+template<size_t kAlignment> template<typename Visitor>
+inline void SpaceBitmap<kAlignment>::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
+                                                      const Visitor& visitor) const {
   DCHECK_LT(visit_begin, visit_end);
 #if 0
   for (uintptr_t i = visit_begin; i < visit_end; i += kAlignment) {
@@ -148,7 +161,8 @@
 #endif
 }
 
-inline bool SpaceBitmap::Modify(const mirror::Object* obj, bool do_set) {
+template<size_t kAlignment> template<bool kSetBit>
+inline bool SpaceBitmap<kAlignment>::Modify(const mirror::Object* obj) {
   uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
   DCHECK_GE(addr, heap_begin_);
   const uintptr_t offset = addr - heap_begin_;
@@ -157,15 +171,24 @@
   DCHECK_LT(index, bitmap_size_ / kWordSize) << " bitmap_size_ = " << bitmap_size_;
   uword* address = &bitmap_begin_[index];
   uword old_word = *address;
-  if (do_set) {
+  if (kSetBit) {
     *address = old_word | mask;
   } else {
     *address = old_word & ~mask;
   }
-  DCHECK_EQ(Test(obj), do_set);
+  DCHECK_EQ(Test(obj), kSetBit);
   return (old_word & mask) != 0;
 }
 
+template<size_t kAlignment>
+inline std::ostream& operator << (std::ostream& stream, const SpaceBitmap<kAlignment>& bitmap) {
+  return stream
+    << bitmap.GetName() << "["
+    << "begin=" << reinterpret_cast<const void*>(bitmap.HeapBegin())
+    << ",end=" << reinterpret_cast<const void*>(bitmap.HeapLimit())
+    << "]";
+}
+
 }  // namespace accounting
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 1957c21..7eed05a 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -14,51 +14,24 @@
  * limitations under the License.
  */
 
-#include "base/logging.h"
-#include "dex_file-inl.h"
-#include "heap_bitmap.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/object-inl.h"
-#include "mirror/object_array-inl.h"
-#include "object_utils.h"
 #include "space_bitmap-inl.h"
-#include "UniquePtr.h"
-#include "utils.h"
 
 namespace art {
 namespace gc {
 namespace accounting {
 
-std::string SpaceBitmap::GetName() const {
-  return name_;
-}
-
-void SpaceBitmap::SetName(const std::string& name) {
-  name_ = name;
-}
-
-std::string SpaceBitmap::Dump() const {
-  return StringPrintf("%s: %p-%p", name_.c_str(),
-                      reinterpret_cast<void*>(HeapBegin()),
-                      reinterpret_cast<void*>(HeapLimit()));
-}
-
-void ObjectSet::Walk(ObjectCallback* callback, void* arg) {
-  for (const mirror::Object* obj : contained_) {
-    callback(const_cast<mirror::Object*>(obj), arg);
-  }
-}
-
-SpaceBitmap* SpaceBitmap::CreateFromMemMap(const std::string& name, MemMap* mem_map,
-                                           byte* heap_begin, size_t heap_capacity) {
+template<size_t kAlignment>
+SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap(
+    const std::string& name, MemMap* mem_map, byte* heap_begin, size_t heap_capacity) {
   CHECK(mem_map != nullptr);
   uword* bitmap_begin = reinterpret_cast<uword*>(mem_map->Begin());
   size_t bitmap_size = OffsetToIndex(RoundUp(heap_capacity, kAlignment * kBitsPerWord)) * kWordSize;
   return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin);
 }
 
-SpaceBitmap* SpaceBitmap::Create(const std::string& name, byte* heap_begin, size_t heap_capacity) {
+template<size_t kAlignment>
+SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::Create(
+    const std::string& name, byte* heap_begin, size_t heap_capacity) {
   CHECK(heap_begin != NULL);
   // Round up since heap_capacity is not necessarily a multiple of kAlignment * kBitsPerWord.
   size_t bitmap_size = OffsetToIndex(RoundUp(heap_capacity, kAlignment * kBitsPerWord)) * kWordSize;
@@ -72,10 +45,8 @@
   return CreateFromMemMap(name, mem_map.release(), heap_begin, heap_capacity);
 }
 
-// Clean up any resources associated with the bitmap.
-SpaceBitmap::~SpaceBitmap() {}
-
-void SpaceBitmap::SetHeapLimit(uintptr_t new_end) {
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::SetHeapLimit(uintptr_t new_end) {
   DCHECK(IsAligned<kBitsPerWord * kAlignment>(new_end));
   size_t new_size = OffsetToIndex(new_end - heap_begin_) * kWordSize;
   if (new_size < bitmap_size_) {
@@ -85,7 +56,8 @@
   // should be marked.
 }
 
-void SpaceBitmap::Clear() {
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::Clear() {
   if (bitmap_begin_ != NULL) {
     // This returns the memory to the system.  Successive page faults will return zeroed memory.
     int result = madvise(bitmap_begin_, bitmap_size_, MADV_DONTNEED);
@@ -95,14 +67,14 @@
   }
 }
 
-void SpaceBitmap::CopyFrom(SpaceBitmap* source_bitmap) {
+template<size_t kAlignment>
+inline void SpaceBitmap<kAlignment>::CopyFrom(SpaceBitmap* source_bitmap) {
   DCHECK_EQ(Size(), source_bitmap->Size());
   std::copy(source_bitmap->Begin(), source_bitmap->Begin() + source_bitmap->Size() / kWordSize, Begin());
 }
 
-// Visits set bits in address order.  The callback is not permitted to
-// change the bitmap bits or max during the traversal.
-void SpaceBitmap::Walk(ObjectCallback* callback, void* arg) {
+template<size_t kAlignment>
+inline void SpaceBitmap<kAlignment>::Walk(ObjectCallback* callback, void* arg) {
   CHECK(bitmap_begin_ != NULL);
   CHECK(callback != NULL);
 
@@ -122,15 +94,11 @@
   }
 }
 
-// Walk through the bitmaps in increasing address order, and find the
-// object pointers that correspond to garbage objects.  Call
-// <callback> zero or more times with lists of these object pointers.
-//
-// The callback is not permitted to increase the max of either bitmap.
-void SpaceBitmap::SweepWalk(const SpaceBitmap& live_bitmap,
-                            const SpaceBitmap& mark_bitmap,
-                            uintptr_t sweep_begin, uintptr_t sweep_end,
-                            SpaceBitmap::SweepCallback* callback, void* arg) {
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::SweepWalk(const SpaceBitmap<kAlignment>& live_bitmap,
+                                               const SpaceBitmap<kAlignment>& mark_bitmap,
+                                               uintptr_t sweep_begin, uintptr_t sweep_end,
+                                               SpaceBitmap::SweepCallback* callback, void* arg) {
   CHECK(live_bitmap.bitmap_begin_ != NULL);
   CHECK(mark_bitmap.bitmap_begin_ != NULL);
   CHECK_EQ(live_bitmap.heap_begin_, mark_bitmap.heap_begin_);
@@ -174,13 +142,10 @@
   }
 }
 
-static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
-                              void* arg);
-
-// Walk instance fields of the given Class. Separate function to allow recursion on the super
-// class.
-static void WalkInstanceFields(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
-                               mirror::Class* klass, void* arg)
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::WalkInstanceFields(SpaceBitmap<kAlignment>* visited,
+                                                 ObjectCallback* callback, mirror::Object* obj,
+                                                 mirror::Class* klass, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Visit fields of parent classes first.
   mirror::Class* super = klass->GetSuperClass();
@@ -203,10 +168,10 @@
   }
 }
 
-// For an unvisited object, visit it then all its children found via fields.
-static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
-                              void* arg)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::WalkFieldsInOrder(SpaceBitmap<kAlignment>* visited,
+                                                       ObjectCallback* callback,
+                                                       mirror::Object* obj, void* arg) {
   if (visited->Test(obj)) {
     return;
   }
@@ -244,14 +209,13 @@
   }
 }
 
-// Visits set bits with an in order traversal.  The callback is not permitted to change the bitmap
-// bits or max during the traversal.
-void SpaceBitmap::InOrderWalk(ObjectCallback* callback, void* arg) {
-  UniquePtr<SpaceBitmap> visited(Create("bitmap for in-order walk",
-                                       reinterpret_cast<byte*>(heap_begin_),
-                                       IndexToOffset(bitmap_size_ / kWordSize)));
-  CHECK(bitmap_begin_ != NULL);
-  CHECK(callback != NULL);
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::InOrderWalk(ObjectCallback* callback, void* arg) {
+  UniquePtr<SpaceBitmap<kAlignment>> visited(
+      Create("bitmap for in-order walk", reinterpret_cast<byte*>(heap_begin_),
+             IndexToOffset(bitmap_size_ / kWordSize)));
+  CHECK(bitmap_begin_ != nullptr);
+  CHECK(callback != nullptr);
   uintptr_t end = Size() / kWordSize;
   for (uintptr_t i = 0; i < end; ++i) {
     // Need uint for unsigned shift.
@@ -268,14 +232,15 @@
   }
 }
 
-std::ostream& operator << (std::ostream& stream, const SpaceBitmap& bitmap) {
-  return stream
-    << bitmap.GetName() << "["
-    << "begin=" << reinterpret_cast<const void*>(bitmap.HeapBegin())
-    << ",end=" << reinterpret_cast<const void*>(bitmap.HeapLimit())
-    << "]";
+void ObjectSet::Walk(ObjectCallback* callback, void* arg) {
+  for (const mirror::Object* obj : contained_) {
+    callback(const_cast<mirror::Object*>(obj), arg);
+  }
 }
 
+template class SpaceBitmap<kObjectAlignment>;
+template class SpaceBitmap<kPageSize>;
+
 }  // namespace accounting
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index a88f3e4..b90a799 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -38,11 +38,9 @@
 namespace gc {
 namespace accounting {
 
+template<size_t kAlignment>
 class SpaceBitmap {
  public:
-  // Alignment of objects within spaces.
-  static const size_t kAlignment = 8;
-
   typedef void ScanCallback(mirror::Object* obj, void* finger, void* arg);
 
   typedef void SweepCallback(size_t ptr_count, mirror::Object** ptrs, void* arg);
@@ -57,30 +55,31 @@
   static SpaceBitmap* CreateFromMemMap(const std::string& name, MemMap* mem_map,
                                        byte* heap_begin, size_t heap_capacity);
 
-  ~SpaceBitmap();
+  ~SpaceBitmap() {
+  }
 
   // <offset> is the difference from .base to a pointer address.
   // <index> is the index of .bits that contains the bit representing
   //         <offset>.
-  static size_t OffsetToIndex(size_t offset) {
+  static size_t OffsetToIndex(size_t offset) ALWAYS_INLINE {
     return offset / kAlignment / kBitsPerWord;
   }
 
-  static uintptr_t IndexToOffset(size_t index) {
+  static uintptr_t IndexToOffset(size_t index) ALWAYS_INLINE {
     return static_cast<uintptr_t>(index * kAlignment * kBitsPerWord);
   }
 
   // Bits are packed in the obvious way.
-  static uword OffsetToMask(uintptr_t offset) {
+  static uword OffsetToMask(uintptr_t offset) ALWAYS_INLINE {
     return (static_cast<size_t>(1)) << ((offset / kAlignment) % kBitsPerWord);
   }
 
-  inline bool Set(const mirror::Object* obj) {
-    return Modify(obj, true);
+  bool Set(const mirror::Object* obj) ALWAYS_INLINE {
+    return Modify<true>(obj);
   }
 
-  inline bool Clear(const mirror::Object* obj) {
-    return Modify(obj, false);
+  bool Clear(const mirror::Object* obj) ALWAYS_INLINE {
+    return Modify<false>(obj);
   }
 
   // Returns true if the object was previously marked.
@@ -123,20 +122,26 @@
     }
   }
 
-  /**
-   * Visit the live objects in the range [visit_begin, visit_end).
-   */
+  // Visit the live objects in the range [visit_begin, visit_end).
+  // TODO: Use lock annotations when clang is fixed.
+  // EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template <typename Visitor>
   void VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end, const Visitor& visitor) const
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      NO_THREAD_SAFETY_ANALYSIS;
 
+  // Visits set bits in address order.  The callback is not permitted to change the bitmap bits or
+  // max during the traversal.
   void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Visits set bits with an in order traversal.  The callback is not permitted to change the bitmap
+  // bits or max during the traversal.
   void InOrderWalk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
+  // Walk through the bitmaps in increasing address order, and find the object pointers that
+  // correspond to garbage objects.  Call <callback> zero or more times with lists of these object
+  // pointers. The callback is not permitted to increase the max of either bitmap.
   static void SweepWalk(const SpaceBitmap& live, const SpaceBitmap& mark, uintptr_t base,
                         uintptr_t max, SweepCallback* thunk, void* arg);
 
@@ -169,10 +174,18 @@
   // Set the max address which can covered by the bitmap.
   void SetHeapLimit(uintptr_t new_end);
 
-  std::string GetName() const;
-  void SetName(const std::string& name);
+  std::string GetName() const {
+    return name_;
+  }
 
-  std::string Dump() const;
+  void SetName(const std::string& name) {
+    name_ = name;
+  }
+
+  std::string Dump() const {
+    return StringPrintf("%s: %p-%p", name_.c_str(), reinterpret_cast<void*>(HeapBegin()),
+                        reinterpret_cast<void*>(HeapLimit()));
+  }
 
   const void* GetObjectWordAddress(const mirror::Object* obj) const {
     uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
@@ -190,7 +203,17 @@
         heap_begin_(reinterpret_cast<uintptr_t>(heap_begin)),
         name_(name) {}
 
-  bool Modify(const mirror::Object* obj, bool do_set);
+  template<bool kSetBit>
+  bool Modify(const mirror::Object* obj);
+
+  // For an unvisited object, visit it then all its children found via fields.
+  static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
+                                void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Walk instance fields of the given Class. Separate function to allow recursion on the super
+  // class.
+  static void WalkInstanceFields(SpaceBitmap<kAlignment>* visited, ObjectCallback* callback,
+                                 mirror::Object* obj, mirror::Class* klass, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Backing storage for bitmap.
   UniquePtr<MemMap> mem_map_;
@@ -272,7 +295,12 @@
   Objects contained_;
 };
 
-std::ostream& operator << (std::ostream& stream, const SpaceBitmap& bitmap);
+typedef SpaceBitmap<kObjectAlignment> ContinuousSpaceBitmap;
+// TODO: Replace usage of ObjectSet with LargeObjectBitmap.
+typedef SpaceBitmap<kLargeObjectAlignment> LargeObjectBitmap;
+
+template<size_t kAlignment>
+std::ostream& operator << (std::ostream& stream, const SpaceBitmap<kAlignment>& bitmap);
 
 }  // namespace accounting
 }  // namespace gc
diff --git a/runtime/gc/accounting/space_bitmap_test.cc b/runtime/gc/accounting/space_bitmap_test.cc
index 68994a8..7c18052 100644
--- a/runtime/gc/accounting/space_bitmap_test.cc
+++ b/runtime/gc/accounting/space_bitmap_test.cc
@@ -32,14 +32,15 @@
 TEST_F(SpaceBitmapTest, Init) {
   byte* heap_begin = reinterpret_cast<byte*>(0x10000000);
   size_t heap_capacity = 16 * MB;
-  UniquePtr<SpaceBitmap> space_bitmap(SpaceBitmap::Create("test bitmap",
-                                                          heap_begin, heap_capacity));
+  UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+      ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   EXPECT_TRUE(space_bitmap.get() != NULL);
 }
 
 class BitmapVerify {
  public:
-  BitmapVerify(SpaceBitmap* bitmap, const mirror::Object* begin, const mirror::Object* end)
+  BitmapVerify(ContinuousSpaceBitmap* bitmap, const mirror::Object* begin,
+               const mirror::Object* end)
     : bitmap_(bitmap),
       begin_(begin),
       end_(end) {}
@@ -50,7 +51,7 @@
     EXPECT_EQ(bitmap_->Test(obj), ((reinterpret_cast<uintptr_t>(obj) & 0xF) != 0));
   }
 
-  SpaceBitmap* bitmap_;
+  ContinuousSpaceBitmap* bitmap_;
   const mirror::Object* begin_;
   const mirror::Object* end_;
 };
@@ -59,14 +60,14 @@
   byte* heap_begin = reinterpret_cast<byte*>(0x10000000);
   size_t heap_capacity = 16 * MB;
 
-  UniquePtr<SpaceBitmap> space_bitmap(SpaceBitmap::Create("test bitmap",
-                                                          heap_begin, heap_capacity));
+  UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+      ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   EXPECT_TRUE(space_bitmap.get() != NULL);
 
   // Set all the odd bits in the first BitsPerWord * 3 to one.
   for (size_t j = 0; j < kBitsPerWord * 3; ++j) {
     const mirror::Object* obj =
-        reinterpret_cast<mirror::Object*>(heap_begin + j * SpaceBitmap::kAlignment);
+        reinterpret_cast<mirror::Object*>(heap_begin + j * kObjectAlignment);
     if (reinterpret_cast<uintptr_t>(obj) & 0xF) {
       space_bitmap->Set(obj);
     }
@@ -77,10 +78,10 @@
   // words.
   for (size_t i = 0; i < static_cast<size_t>(kBitsPerWord); ++i) {
     mirror::Object* start =
-        reinterpret_cast<mirror::Object*>(heap_begin + i * SpaceBitmap::kAlignment);
+        reinterpret_cast<mirror::Object*>(heap_begin + i * kObjectAlignment);
     for (size_t j = 0; j < static_cast<size_t>(kBitsPerWord * 2); ++j) {
       mirror::Object* end =
-          reinterpret_cast<mirror::Object*>(heap_begin + (i + j) * SpaceBitmap::kAlignment);
+          reinterpret_cast<mirror::Object*>(heap_begin + (i + j) * kObjectAlignment);
       BitmapVerify(space_bitmap.get(), start, end);
     }
   }
@@ -118,8 +119,8 @@
 
 
   for (int i = 0; i < 5 ; ++i) {
-    UniquePtr<SpaceBitmap> space_bitmap(SpaceBitmap::Create("test bitmap",
-                                                            heap_begin, heap_capacity));
+    UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+        ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
 
     for (int j = 0; j < 10000; ++j) {
       size_t offset = (r.next() % heap_capacity) & ~(0x7);
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 82340f5..d99136a 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -174,8 +174,8 @@
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect ||
         (gc_type == kGcTypeFull &&
          space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
-      accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-      accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+      accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+      accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
       if (live_bitmap != nullptr && live_bitmap != mark_bitmap) {
         heap_->GetLiveBitmap()->ReplaceBitmap(live_bitmap, mark_bitmap);
         heap_->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
@@ -204,6 +204,14 @@
   return (static_cast<uint64_t>(freed_bytes_) * 1000) / (NsToMs(GetDurationNs()) + 1);
 }
 
+void GarbageCollector::ResetMeasurements() {
+  cumulative_timings_.Reset();
+  pause_histogram_.Reset();
+  total_time_ns_ = 0;
+  total_freed_objects_ = 0;
+  total_freed_bytes_ = 0;
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 5b7b8a2..b19ac3f 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -110,6 +110,9 @@
     return pause_histogram_;
   }
 
+  // Reset the cumulative timings and pause histogram.
+  void ResetMeasurements();
+
   // Returns the estimated throughput in bytes / second.
   uint64_t GetEstimatedMeanThroughput() const;
 
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 944ef8d..f07e6f1 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -123,13 +123,15 @@
   mark_immune_count_ = 0;
   mark_fastpath_count_ = 0;
   mark_slowpath_count_ = 0;
-  FindDefaultSpaceBitmap();
   {
     // TODO: I don't think we should need heap bitmap lock to get the mark bitmap.
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
     mark_bitmap_ = heap_->GetMarkBitmap();
   }
-
+  if (!clear_soft_references_) {
+    // Always clear soft references if a non-sticky collection.
+    clear_soft_references_ = GetGcType() != collector::kGcTypeSticky;
+  }
   // Do any pre GC verification.
   timings_.NewSplit("PreGcVerification");
   heap_->PreGcVerification(this);
@@ -290,7 +292,7 @@
 void MarkSweep::FindDefaultSpaceBitmap() {
   TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* bitmap = space->GetMarkBitmap();
     if (bitmap != nullptr &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
       current_space_bitmap_ = bitmap;
@@ -356,7 +358,7 @@
   }
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
+  accounting::ContinuousSpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
     object_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
     if (kCountMarkedObjects) {
@@ -425,9 +427,9 @@
   }
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
+  accounting::ContinuousSpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::SpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
+    accounting::ContinuousSpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
     if (new_bitmap != NULL) {
       object_bitmap = new_bitmap;
     } else {
@@ -473,7 +475,7 @@
 void MarkSweep::VerifyRoot(const Object* root, size_t vreg, const StackVisitor* visitor,
                            RootType root_type) {
   // See if the root is on any space bitmap.
-  if (GetHeap()->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
+  if (heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
     space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
     if (!large_object_space->Contains(root)) {
       LOG(ERROR) << "Found invalid root: " << root << " with type " << root_type;
@@ -683,7 +685,8 @@
 
 class CardScanTask : public MarkStackTask<false> {
  public:
-  CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, accounting::SpaceBitmap* bitmap,
+  CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
+               accounting::ContinuousSpaceBitmap* bitmap,
                byte* begin, byte* end, byte minimum_age, size_t mark_stack_size,
                Object** mark_stack_obj)
       : MarkStackTask<false>(thread_pool, mark_sweep, mark_stack_size, mark_stack_obj),
@@ -694,7 +697,7 @@
   }
 
  protected:
-  accounting::SpaceBitmap* const bitmap_;
+  accounting::ContinuousSpaceBitmap* const bitmap_;
   byte* const begin_;
   byte* const end_;
   const byte minimum_age_;
@@ -817,7 +820,7 @@
 class RecursiveMarkTask : public MarkStackTask<false> {
  public:
   RecursiveMarkTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
-                    accounting::SpaceBitmap* bitmap, uintptr_t begin, uintptr_t end)
+                    accounting::ContinuousSpaceBitmap* bitmap, uintptr_t begin, uintptr_t end)
       : MarkStackTask<false>(thread_pool, mark_sweep, 0, NULL),
         bitmap_(bitmap),
         begin_(begin),
@@ -825,7 +828,7 @@
   }
 
  protected:
-  accounting::SpaceBitmap* const bitmap_;
+  accounting::ContinuousSpaceBitmap* const bitmap_;
   const uintptr_t begin_;
   const uintptr_t end_;
 
@@ -1042,8 +1045,8 @@
   // Start by sweeping the continuous spaces.
   for (space::ContinuousSpace* space : sweep_spaces) {
     space::AllocSpace* alloc_space = space->AsAllocSpace();
-    accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
     if (swap_bitmaps) {
       std::swap(live_bitmap, mark_bitmap);
     }
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index d49e427..6dbb270 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -22,6 +22,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "garbage_collector.h"
+#include "gc/accounting/space_bitmap.h"
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -45,7 +46,6 @@
 namespace accounting {
   template<typename T> class AtomicStack;
   typedef AtomicStack<mirror::Object*> ObjectStack;
-  class SpaceBitmap;
 }  // namespace accounting
 
 namespace collector {
@@ -283,7 +283,7 @@
 
   // Current space, we check this space first to avoid searching for the appropriate space for an
   // object.
-  accounting::SpaceBitmap* current_space_bitmap_;
+  accounting::ContinuousSpaceBitmap* current_space_bitmap_;
   // Cache the heap's mark bitmap to prevent having to do 2 loads during slow path marking.
   accounting::HeapBitmap* mark_bitmap_;
 
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index df731ff..8a9611f 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -65,7 +65,7 @@
       }
       obj_ptr->Assign(forward_address);
     } else {
-      accounting::SpaceBitmap* object_bitmap =
+      accounting::ContinuousSpaceBitmap* object_bitmap =
           heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
       if (LIKELY(object_bitmap != nullptr)) {
         if (generational_) {
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 1366858..c0e172e 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -63,8 +63,9 @@
 namespace collector {
 
 static constexpr bool kProtectFromSpace = true;
-static constexpr bool kClearFromSpace = true;
 static constexpr bool kStoreStackTraces = false;
+static constexpr bool kUseBytesPromoted = true;
+static constexpr size_t kBytesPromotedThreshold = 4 * MB;
 
 void SemiSpace::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
@@ -102,8 +103,10 @@
       generational_(generational),
       last_gc_to_space_end_(nullptr),
       bytes_promoted_(0),
+      bytes_promoted_since_last_whole_heap_collection_(0),
       whole_heap_collection_(true),
-      whole_heap_collection_interval_counter_(0) {
+      whole_heap_collection_interval_counter_(0),
+      collector_name_(name_) {
 }
 
 void SemiSpace::InitializePhase() {
@@ -118,6 +121,7 @@
   // Do any pre GC verification.
   timings_.NewSplit("PreGcVerification");
   heap_->PreGcVerification(this);
+  CHECK(from_space_->CanMoveObjects()) << "Attempting to move from " << *from_space_;
   // Set the initial bitmap.
   to_space_live_bitmap_ = to_space_->GetLiveBitmap();
 }
@@ -150,20 +154,34 @@
       // collection, collect the whole heap (and reset the interval
       // counter to be consistent.)
       whole_heap_collection_ = true;
-      whole_heap_collection_interval_counter_ = 0;
+      if (!kUseBytesPromoted) {
+        whole_heap_collection_interval_counter_ = 0;
+      }
     }
     if (whole_heap_collection_) {
       VLOG(heap) << "Whole heap collection";
+      name_ = collector_name_ + " whole";
     } else {
       VLOG(heap) << "Bump pointer space only collection";
+      name_ = collector_name_ + " bps";
     }
   }
+
+  if (!clear_soft_references_) {
+    if (!generational_) {
+      // If non-generational, always clear soft references.
+      clear_soft_references_ = true;
+    } else {
+      // If generational, clear soft references if a whole heap collection.
+      if (whole_heap_collection_) {
+        clear_soft_references_ = true;
+      }
+    }
+  }
+
   Locks::mutator_lock_->AssertExclusiveHeld(self_);
 
   TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
-  // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
-  // wrong space.
-  heap_->SwapSemiSpaces();
   if (generational_) {
     // If last_gc_to_space_end_ is out of the bounds of the from-space
     // (the to-space from last GC), then point it to the beginning of
@@ -315,7 +333,7 @@
           // remain in the space, that is, the remembered set (and the
           // card table) didn't miss any from-space references in the
           // space.
-          accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+          accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
           SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor visitor(this);
           live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
                                         reinterpret_cast<uintptr_t>(space->End()),
@@ -323,7 +341,7 @@
         }
       } else {
         DCHECK(rem_set == nullptr);
-        accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+        accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
         SemiSpaceScanObjectVisitor visitor(this);
         live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
                                       reinterpret_cast<uintptr_t>(space->End()),
@@ -375,10 +393,10 @@
   // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
   // space.
   heap_->RecordFree(freed_objects, freed_bytes);
+
   timings_.StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
   timings_.EndSplit();
-
   {
     WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     // Reclaim unmarked objects.
@@ -393,11 +411,9 @@
     TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
     GetHeap()->UnBindBitmaps();
   }
-  if (kClearFromSpace) {
-    // Release the memory used by the from space.
-    from_space_->Clear();
-  }
-  from_space_->Reset();
+  // TODO: Do this before doing verification since the from space may have objects which weren't
+  // moved and point to dead objects.
+  from_space_->Clear();
   // Protect the from space.
   VLOG(heap) << "Protecting space " << *from_space_;
   if (kProtectFromSpace) {
@@ -519,9 +535,9 @@
       // space.
       GetHeap()->WriteBarrierEveryFieldOf(forward_address);
       // Handle the bitmaps marking.
-      accounting::SpaceBitmap* live_bitmap = promo_dest_space->GetLiveBitmap();
+      accounting::ContinuousSpaceBitmap* live_bitmap = promo_dest_space->GetLiveBitmap();
       DCHECK(live_bitmap != nullptr);
-      accounting::SpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
+      accounting::ContinuousSpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
       DCHECK(mark_bitmap != nullptr);
       DCHECK(!live_bitmap->Test(forward_address));
       if (!whole_heap_collection_) {
@@ -694,8 +710,8 @@
 
 // Scan anything that's on the mark stack.
 void SemiSpace::ProcessMarkStack() {
-  space::MallocSpace* promo_dest_space = NULL;
-  accounting::SpaceBitmap* live_bitmap = NULL;
+  space::MallocSpace* promo_dest_space = nullptr;
+  accounting::ContinuousSpaceBitmap* live_bitmap = nullptr;
   if (generational_ && !whole_heap_collection_) {
     // If a bump pointer space only collection (and the promotion is
     // enabled,) we delay the live-bitmap marking of promoted objects
@@ -703,7 +719,7 @@
     promo_dest_space = GetHeap()->GetPrimaryFreeListSpace();
     live_bitmap = promo_dest_space->GetLiveBitmap();
     DCHECK(live_bitmap != nullptr);
-    accounting::SpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
     DCHECK(mark_bitmap != nullptr);
     DCHECK_EQ(live_bitmap, mark_bitmap);
   }
@@ -762,18 +778,34 @@
   if (generational_) {
     // Decide whether to do a whole heap collection or a bump pointer
     // only space collection at the next collection by updating
-    // whole_heap_collection. Enable whole_heap_collection once every
-    // kDefaultWholeHeapCollectionInterval collections.
+    // whole_heap_collection.
     if (!whole_heap_collection_) {
-      --whole_heap_collection_interval_counter_;
-      DCHECK_GE(whole_heap_collection_interval_counter_, 0);
-      if (whole_heap_collection_interval_counter_ == 0) {
-        whole_heap_collection_ = true;
+      if (!kUseBytesPromoted) {
+        // Enable whole_heap_collection once every
+        // kDefaultWholeHeapCollectionInterval collections.
+        --whole_heap_collection_interval_counter_;
+        DCHECK_GE(whole_heap_collection_interval_counter_, 0);
+        if (whole_heap_collection_interval_counter_ == 0) {
+          whole_heap_collection_ = true;
+        }
+      } else {
+        // Enable whole_heap_collection if the bytes promoted since
+        // the last whole heap collection exceeds a threshold.
+        bytes_promoted_since_last_whole_heap_collection_ += bytes_promoted_;
+        if (bytes_promoted_since_last_whole_heap_collection_ >= kBytesPromotedThreshold) {
+          whole_heap_collection_ = true;
+        }
       }
     } else {
-      DCHECK_EQ(whole_heap_collection_interval_counter_, 0);
-      whole_heap_collection_interval_counter_ = kDefaultWholeHeapCollectionInterval;
-      whole_heap_collection_ = false;
+      if (!kUseBytesPromoted) {
+        DCHECK_EQ(whole_heap_collection_interval_counter_, 0);
+        whole_heap_collection_interval_counter_ = kDefaultWholeHeapCollectionInterval;
+        whole_heap_collection_ = false;
+      } else {
+        // Reset it.
+        bytes_promoted_since_last_whole_heap_collection_ = bytes_promoted_;
+        whole_heap_collection_ = false;
+      }
     }
   }
   // Clear all of the spaces' mark bitmaps.
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index f067cb2..4169ca9 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -21,6 +21,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "garbage_collector.h"
+#include "gc/accounting/space_bitmap.h"
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -42,7 +43,6 @@
 namespace accounting {
   template <typename T> class AtomicStack;
   typedef AtomicStack<mirror::Object*> ObjectStack;
-  class SpaceBitmap;
 }  // namespace accounting
 
 namespace space {
@@ -198,7 +198,8 @@
   // Destination and source spaces (can be any type of ContinuousMemMapAllocSpace which either has
   // a live bitmap or doesn't).
   space::ContinuousMemMapAllocSpace* to_space_;
-  accounting::SpaceBitmap* to_space_live_bitmap_;  // Cached live bitmap as an optimization.
+  // Cached live bitmap as an optimization.
+  accounting::ContinuousSpaceBitmap* to_space_live_bitmap_;
   space::ContinuousMemMapAllocSpace* from_space_;
 
   Thread* self_;
@@ -217,6 +218,11 @@
   // bump pointer space to the non-moving space.
   uint64_t bytes_promoted_;
 
+  // Used for the generational mode. Keeps track of how many bytes of
+  // objects have been copied so far from the bump pointer space to
+  // the non-moving space, since the last whole heap collection.
+  uint64_t bytes_promoted_since_last_whole_heap_collection_;
+
   // Used for the generational mode. When true, collect the whole
   // heap. When false, collect only the bump pointer spaces.
   bool whole_heap_collection_;
@@ -228,6 +234,9 @@
   // How many bytes we avoided dirtying.
   size_t saved_bytes_;
 
+  // The name of the collector.
+  std::string collector_name_;
+
   // Used for the generational mode. The default interval of the whole
   // heap collection. If N, the whole heap collection occurs every N
   // collections.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index e3fa834..07d0455 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -77,10 +77,19 @@
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
 static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB;
 static constexpr size_t kMaxConcurrentRemainingBytes = 512 * KB;
+// Sticky GC throughput adjustment, divided by 4. Increasing this causes sticky GC to occur more
+// relative to partial/full GC. This is desirable since sticky GCs interfere less with mutator
+// threads (lower pauses, use less memory bandwidth).
+static constexpr double kStickyGcThroughputAdjustment = 1.25;
+// Whether or not we use the free list large object space.
+static constexpr bool kUseFreeListSpaceForLOS = false;
+// Whtehr or not we compact the zygote in PreZygoteFork.
+static constexpr bool kCompactZygote = kMovingCollector;
+static constexpr size_t kNonMovingSpaceCapacity = 64 * MB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& image_file_name,
-           CollectorType post_zygote_collector_type, CollectorType background_collector_type,
+           CollectorType foreground_collector_type, CollectorType background_collector_type,
            size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
            size_t long_pause_log_threshold, size_t long_gc_log_threshold,
            bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
@@ -91,9 +100,9 @@
       dlmalloc_space_(nullptr),
       main_space_(nullptr),
       collector_type_(kCollectorTypeNone),
-      post_zygote_collector_type_(post_zygote_collector_type),
+      foreground_collector_type_(foreground_collector_type),
       background_collector_type_(background_collector_type),
-      desired_collector_type_(collector_type_),
+      desired_collector_type_(foreground_collector_type_),
       heap_trim_request_lock_(nullptr),
       last_trim_time_(0),
       heap_transition_target_time_(0),
@@ -158,15 +167,11 @@
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
   if (!is_zygote) {
-    desired_collector_type_ = post_zygote_collector_type_;
     large_object_threshold_ = kDefaultLargeObjectThreshold;
-  } else {
-    if (kMovingCollector) {
-      // We are the zygote, use bump pointer allocation + semi space collector.
-      bool generational = post_zygote_collector_type_ == kCollectorTypeGSS;
-      desired_collector_type_ = generational ? kCollectorTypeGSS : kCollectorTypeSS;
-    } else {
-      desired_collector_type_ = post_zygote_collector_type_;
+    // Background compaction is currently not supported for command line runs.
+    if (background_collector_type_ != foreground_collector_type_) {
+      LOG(WARNING) << "Disabling background compaction for non zygote";
+      background_collector_type_ = foreground_collector_type_;
     }
   }
   ChangeCollector(desired_collector_type_);
@@ -183,73 +188,61 @@
     // isn't going to get in the middle
     byte* oat_file_end_addr = image_space->GetImageHeader().GetOatFileEnd();
     CHECK_GT(oat_file_end_addr, image_space->End());
-    if (oat_file_end_addr > requested_alloc_space_begin) {
-      requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
-    }
+    requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
   }
-  MemMap* malloc_space_mem_map = nullptr;
-  const char* malloc_space_name = is_zygote ? "zygote space" : "alloc space";
   if (is_zygote) {
-    // Allocate a single mem map that is split into the malloc space
-    // and the post zygote non-moving space to put them adjacent.
-    size_t post_zygote_non_moving_space_size = 64 * MB;
-    size_t non_moving_spaces_size = capacity + post_zygote_non_moving_space_size;
+    // Reserve the address range before we create the non moving space to make sure bitmaps don't
+    // take it.
     std::string error_str;
-    malloc_space_mem_map = MemMap::MapAnonymous(malloc_space_name, requested_alloc_space_begin,
-                                                non_moving_spaces_size, PROT_READ | PROT_WRITE,
-                                                true, &error_str);
-    CHECK(malloc_space_mem_map != nullptr) << error_str;
-    post_zygote_non_moving_space_mem_map_.reset(malloc_space_mem_map->RemapAtEnd(
-        malloc_space_mem_map->Begin() + capacity, "post zygote non-moving space",
-        PROT_READ | PROT_WRITE, &error_str));
-    CHECK(post_zygote_non_moving_space_mem_map_.get() != nullptr) << error_str;
-    VLOG(heap) << "malloc space mem map : " << malloc_space_mem_map;
-    VLOG(heap) << "post zygote non-moving space mem map : "
-               << post_zygote_non_moving_space_mem_map_.get();
+    MemMap* mem_map = MemMap::MapAnonymous(
+        "main space", requested_alloc_space_begin + kNonMovingSpaceCapacity, capacity,
+        PROT_READ | PROT_WRITE, true, &error_str);
+    CHECK(mem_map != nullptr) << error_str;
+    // Non moving space is always dlmalloc since we currently don't have support for multiple
+    // rosalloc spaces.
+    non_moving_space_ = space::DlMallocSpace::Create(
+        "zygote / non moving space", initial_size, kNonMovingSpaceCapacity, kNonMovingSpaceCapacity,
+        requested_alloc_space_begin, false);
+    non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
+    CreateMainMallocSpace(mem_map, initial_size, growth_limit, capacity);
   } else {
-    // Allocate a mem map for the malloc space.
     std::string error_str;
-    malloc_space_mem_map = MemMap::MapAnonymous(malloc_space_name, requested_alloc_space_begin,
-                                                capacity, PROT_READ | PROT_WRITE, true, &error_str);
-    CHECK(malloc_space_mem_map != nullptr) << error_str;
-    VLOG(heap) << "malloc space mem map : " << malloc_space_mem_map;
+    MemMap* mem_map = MemMap::MapAnonymous("main/non-moving space", requested_alloc_space_begin,
+                                           capacity, PROT_READ | PROT_WRITE, true, &error_str);
+    CHECK(mem_map != nullptr) << error_str;
+    // Create the main free list space, which doubles as the non moving space. We can do this since
+    // non zygote means that we won't have any background compaction.
+    CreateMainMallocSpace(mem_map, initial_size, growth_limit, capacity);
+    non_moving_space_ = main_space_;
   }
-  CHECK(malloc_space_mem_map != nullptr);
-  space::MallocSpace* malloc_space;
-  if (kUseRosAlloc) {
-    malloc_space = space::RosAllocSpace::CreateFromMemMap(malloc_space_mem_map, malloc_space_name,
-                                                          kDefaultStartingSize, initial_size,
-                                                          growth_limit, capacity, low_memory_mode_);
-    CHECK(malloc_space != nullptr) << "Failed to create rosalloc space";
-  } else {
-    malloc_space = space::DlMallocSpace::CreateFromMemMap(malloc_space_mem_map, malloc_space_name,
-                                                          kDefaultStartingSize, initial_size,
-                                                          growth_limit, capacity);
-    CHECK(malloc_space != nullptr) << "Failed to create dlmalloc space";
-  }
-  VLOG(heap) << "malloc_space : " << malloc_space;
+  CHECK(non_moving_space_ != nullptr);
+
+  // We need to create the bump pointer if the foreground collector is a compacting GC. We only
+  // create the bump pointer space if we are not a moving foreground collector but have a moving
+  // background collector since the heap transition code will create the temp space by recycling
+  // the bitmap from the main space.
   if (kMovingCollector) {
     // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
-    // TODO: Having 3+ spaces as big as the large heap size can cause virtual memory fragmentation
-    // issues.
-    const size_t bump_pointer_space_size = std::min(malloc_space->Capacity(), 128 * MB);
+    // TODO: Not create all the bump pointer spaces if not necessary (currently only GSS needs all
+    // 2 of bump pointer spaces + main space) b/14059466. Divide by 2 for a temporary fix.
+    const size_t bump_pointer_space_capacity = capacity / 2;
     bump_pointer_space_ = space::BumpPointerSpace::Create("Bump pointer space",
-                                                          bump_pointer_space_size, nullptr);
+                                                          bump_pointer_space_capacity, nullptr);
     CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space";
     AddSpace(bump_pointer_space_);
-    temp_space_ = space::BumpPointerSpace::Create("Bump pointer space 2", bump_pointer_space_size,
-                                                  nullptr);
+    temp_space_ = space::BumpPointerSpace::Create("Bump pointer space 2",
+                                                  bump_pointer_space_capacity, nullptr);
     CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space";
     AddSpace(temp_space_);
-    VLOG(heap) << "bump_pointer_space : " << bump_pointer_space_;
-    VLOG(heap) << "temp_space : " << temp_space_;
   }
-  non_moving_space_ = malloc_space;
-  malloc_space->SetFootprintLimit(malloc_space->Capacity());
-  AddSpace(malloc_space);
+  if (non_moving_space_ != main_space_) {
+    AddSpace(non_moving_space_);
+  }
+  if (main_space_ != nullptr) {
+    AddSpace(main_space_);
+  }
 
   // Allocate the large object space.
-  constexpr bool kUseFreeListSpaceForLOS = false;
   if (kUseFreeListSpaceForLOS) {
     large_object_space_ = space::FreeListSpace::Create("large object space", nullptr, capacity);
   } else {
@@ -264,11 +257,6 @@
   // Relies on the spaces being sorted.
   byte* heap_begin = continuous_spaces_.front()->Begin();
   byte* heap_end = continuous_spaces_.back()->Limit();
-  if (is_zygote) {
-    CHECK(post_zygote_non_moving_space_mem_map_.get() != nullptr);
-    heap_begin = std::min(post_zygote_non_moving_space_mem_map_->Begin(), heap_begin);
-    heap_end = std::max(post_zygote_non_moving_space_mem_map_->End(), heap_end);
-  }
   size_t heap_capacity = heap_end - heap_begin;
 
   // Allocate the card table.
@@ -288,6 +276,12 @@
         new accounting::RememberedSet("Non-moving space remembered set", this, non_moving_space_);
     CHECK(non_moving_space_rem_set != nullptr) << "Failed to create non-moving space remembered set";
     AddRememberedSet(non_moving_space_rem_set);
+    if (main_space_ != nullptr && main_space_ != non_moving_space_) {
+      accounting::RememberedSet* main_space_rem_set =
+          new accounting::RememberedSet("Main space remembered set", this, main_space_);
+      CHECK(main_space_rem_set != nullptr) << "Failed to create main space remembered set";
+      AddRememberedSet(main_space_rem_set);
+    }
   }
 
   // TODO: Count objects in the image space here.
@@ -325,8 +319,9 @@
   }
   if (kMovingCollector) {
     // TODO: Clean this up.
-    bool generational = post_zygote_collector_type_ == kCollectorTypeGSS;
-    semi_space_collector_ = new collector::SemiSpace(this, generational);
+    bool generational = foreground_collector_type_ == kCollectorTypeGSS;
+    semi_space_collector_ = new collector::SemiSpace(this, generational,
+                                                     generational ? "generational" : "");
     garbage_collectors_.push_back(semi_space_collector_);
 
     concurrent_copying_collector_ = new collector::ConcurrentCopying(this);
@@ -342,6 +337,37 @@
   }
 }
 
+void Heap::CreateMainMallocSpace(MemMap* mem_map, size_t initial_size, size_t growth_limit,
+                                 size_t capacity) {
+  // Is background compaction is enabled?
+  bool can_move_objects = IsMovingGc(background_collector_type_) !=
+      IsMovingGc(foreground_collector_type_);
+  // If we are the zygote and don't yet have a zygote space, it means that the zygote fork will
+  // happen in the future. If this happens and we have kCompactZygote enabled we wish to compact
+  // from the main space to the zygote space. If background compaction is enabled, always pass in
+  // that we can move objets.
+  if (kCompactZygote && Runtime::Current()->IsZygote() && !can_move_objects) {
+    // After the zygote we want this to be false if we don't have background compaction enabled so
+    // that getting primitive array elements is faster.
+    can_move_objects = !have_zygote_space_;
+  }
+  if (kUseRosAlloc) {
+    main_space_ = space::RosAllocSpace::CreateFromMemMap(mem_map, "main rosalloc space",
+                                                          kDefaultStartingSize, initial_size,
+                                                          growth_limit, capacity, low_memory_mode_,
+                                                          can_move_objects);
+    CHECK(main_space_ != nullptr) << "Failed to create rosalloc space";
+  } else {
+    main_space_ = space::DlMallocSpace::CreateFromMemMap(mem_map, "main dlmalloc space",
+                                                          kDefaultStartingSize, initial_size,
+                                                          growth_limit, capacity,
+                                                          can_move_objects);
+    CHECK(main_space_ != nullptr) << "Failed to create dlmalloc space";
+  }
+  main_space_->SetFootprintLimit(main_space_->Capacity());
+  VLOG(heap) << "Created main space " << main_space_;
+}
+
 void Heap::ChangeAllocator(AllocatorType allocator) {
   if (current_allocator_ != allocator) {
     // These two allocators are only used internally and don't have any entrypoints.
@@ -355,13 +381,13 @@
 }
 
 void Heap::DisableCompaction() {
-  if (IsCompactingGC(post_zygote_collector_type_)) {
-    post_zygote_collector_type_ = kCollectorTypeCMS;
+  if (IsMovingGc(foreground_collector_type_)) {
+    foreground_collector_type_  = kCollectorTypeCMS;
   }
-  if (IsCompactingGC(background_collector_type_)) {
-    background_collector_type_ = post_zygote_collector_type_;
+  if (IsMovingGc(background_collector_type_)) {
+    background_collector_type_ = foreground_collector_type_;
   }
-  TransitionCollector(post_zygote_collector_type_);
+  TransitionCollector(foreground_collector_type_);
 }
 
 std::string Heap::SafeGetClassDescriptor(mirror::Class* klass) {
@@ -423,14 +449,6 @@
         break;
       }
     }
-    if (space == nullptr) {
-      if (allocator_mem_map_.get() == nullptr || !allocator_mem_map_->HasAddress(obj)) {
-        stream << "obj " << obj << " not a valid heap address";
-        return;
-      } else if (allocator_mem_map_.get() != nullptr) {
-        allocator_mem_map_->Protect(PROT_READ | PROT_WRITE);
-      }
-    }
     // Unprotect all the spaces.
     for (const auto& space : continuous_spaces_) {
       mprotect(space->Begin(), space->Capacity(), PROT_READ | PROT_WRITE);
@@ -473,7 +491,7 @@
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
   ++disable_moving_gc_count_;
-  if (IsCompactingGC(collector_type_running_)) {
+  if (IsMovingGc(collector_type_running_)) {
     WaitForGcToCompleteLocked(self);
   }
 }
@@ -491,12 +509,12 @@
       // Start at index 1 to avoid "is always false" warning.
       // Have iteration 1 always transition the collector.
       TransitionCollector((((i & 1) == 1) == (process_state_ == kProcessStateJankPerceptible))
-                          ? post_zygote_collector_type_ : background_collector_type_);
+                          ? foreground_collector_type_ : background_collector_type_);
       usleep(kCollectorTransitionStressWait);
     }
     if (process_state_ == kProcessStateJankPerceptible) {
       // Transition back to foreground right away to prevent jank.
-      RequestCollectorTransition(post_zygote_collector_type_, 0);
+      RequestCollectorTransition(foreground_collector_type_, 0);
     } else {
       // Don't delay for debug builds since we may want to stress test the GC.
       RequestCollectorTransition(background_collector_type_, kIsDebugBuild ? 0 :
@@ -563,8 +581,8 @@
     DCHECK(!space->IsDiscontinuousSpace());
     space::ContinuousSpace* continuous_space = space->AsContinuousSpace();
     // Continuous spaces don't necessarily have bitmaps.
-    accounting::SpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
-    accounting::SpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
     if (live_bitmap != nullptr) {
       DCHECK(mark_bitmap != nullptr);
       live_bitmap_->AddContinuousSpaceBitmap(live_bitmap);
@@ -604,8 +622,8 @@
     DCHECK(!space->IsDiscontinuousSpace());
     space::ContinuousSpace* continuous_space = space->AsContinuousSpace();
     // Continuous spaces don't necessarily have bitmaps.
-    accounting::SpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
-    accounting::SpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
     if (live_bitmap != nullptr) {
       DCHECK(mark_bitmap != nullptr);
       live_bitmap_->RemoveContinuousSpaceBitmap(live_bitmap);
@@ -621,6 +639,10 @@
     }
     if (continuous_space == main_space_) {
       main_space_ = nullptr;
+    } else if (continuous_space == bump_pointer_space_) {
+      bump_pointer_space_ = nullptr;
+    } else if (continuous_space == temp_space_) {
+      temp_space_ = nullptr;
     }
   } else {
     DCHECK(space->IsDiscontinuousSpace());
@@ -657,12 +679,13 @@
   // Dump cumulative timings.
   os << "Dumping cumulative Gc timings\n";
   uint64_t total_duration = 0;
-
   // Dump cumulative loggers for each GC type.
   uint64_t total_paused_time = 0;
-  for (const auto& collector : garbage_collectors_) {
+  for (auto& collector : garbage_collectors_) {
     const CumulativeLogger& logger = collector->GetCumulativeTimings();
-    if (logger.GetTotalNs() != 0) {
+    const size_t iterations = logger.GetIterations();
+    const Histogram<uint64_t>& pause_histogram = collector->GetPauseHistogram();
+    if (iterations != 0 && pause_histogram.SampleSize() != 0) {
       os << ConstDumpable<CumulativeLogger>(logger);
       const uint64_t total_ns = logger.GetTotalNs();
       const uint64_t total_pause_ns = collector->GetTotalPausedTimeNs();
@@ -670,9 +693,10 @@
       const uint64_t freed_bytes = collector->GetTotalFreedBytes();
       const uint64_t freed_objects = collector->GetTotalFreedObjects();
       Histogram<uint64_t>::CumulativeData cumulative_data;
-      collector->GetPauseHistogram().CreateHistogram(&cumulative_data);
-      collector->GetPauseHistogram().PrintConfidenceIntervals(os, 0.99, cumulative_data);
-      os << collector->GetName() << " total time: " << PrettyDuration(total_ns) << "\n"
+      pause_histogram.CreateHistogram(&cumulative_data);
+      pause_histogram.PrintConfidenceIntervals(os, 0.99, cumulative_data);
+      os << collector->GetName() << " total time: " << PrettyDuration(total_ns)
+         << " mean time: " << PrettyDuration(total_ns / iterations) << "\n"
          << collector->GetName() << " freed: " << freed_objects
          << " objects with total size " << PrettySize(freed_bytes) << "\n"
          << collector->GetName() << " throughput: " << freed_objects / seconds << "/s / "
@@ -680,6 +704,7 @@
       total_duration += total_ns;
       total_paused_time += total_pause_ns;
     }
+    collector->ResetMeasurements();
   }
   uint64_t allocation_time = static_cast<uint64_t>(total_allocation_time_) * kTimeAdjust;
   if (total_duration != 0) {
@@ -959,8 +984,10 @@
       managed_reclaimed += alloc_space->Trim();
     }
   }
-  total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated() -
-      bump_pointer_space_->Size();
+  total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated();
+  if (bump_pointer_space_ != nullptr) {
+    total_alloc_space_allocated -= bump_pointer_space_->Size();
+  }
   const float managed_utilization = static_cast<float>(total_alloc_space_allocated) /
       static_cast<float>(total_alloc_space_size);
   uint64_t gc_heap_end_ns = NanoTime();
@@ -1076,8 +1103,8 @@
 
 void Heap::DumpSpaces(std::ostream& stream) {
   for (const auto& space : continuous_spaces_) {
-    accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
     stream << space << " " << *space << "\n";
     if (live_bitmap != nullptr) {
       stream << live_bitmap << " " << *live_bitmap << "\n";
@@ -1385,14 +1412,13 @@
   VLOG(heap) << "TransitionCollector: " << static_cast<int>(collector_type_)
              << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
-  uint32_t before_size  = GetTotalMemory();
   uint32_t before_allocated = num_bytes_allocated_.Load();
   ThreadList* tl = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
   const bool copying_transition =
-      IsCompactingGC(background_collector_type_) || IsCompactingGC(post_zygote_collector_type_);
+      IsMovingGc(background_collector_type_) || IsMovingGc(foreground_collector_type_);
   // Busy wait until we can GC (StartGC can fail if we have a non-zero
   // compacting_gc_disable_count_, this should rarely occurs).
   for (;;) {
@@ -1401,6 +1427,13 @@
       MutexLock mu(self, *gc_complete_lock_);
       // Ensure there is only one GC at a time.
       WaitForGcToCompleteLocked(self);
+      // If someone else beat us to it and changed the collector before we could, exit.
+      // This is safe to do before the suspend all since we set the collector_type_running_ before
+      // we exit the loop. If another thread attempts to do the heap transition before we exit,
+      // then it would get blocked on WaitForGcToCompleteLocked.
+      if (collector_type == collector_type_) {
+        return;
+      }
       // GC can be disabled if someone has a used GetPrimitiveArrayCritical but not yet released.
       if (!copying_transition || disable_moving_gc_count_ == 0) {
         // TODO: Not hard code in semi-space collector?
@@ -1416,42 +1449,20 @@
     case kCollectorTypeSS:
       // Fall-through.
     case kCollectorTypeGSS: {
-      mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
-      CHECK(main_space_ != nullptr);
-      Compact(temp_space_, main_space_);
-      DCHECK(allocator_mem_map_.get() == nullptr);
-      allocator_mem_map_.reset(main_space_->ReleaseMemMap());
-      madvise(main_space_->Begin(), main_space_->Size(), MADV_DONTNEED);
-      // RemoveSpace does not delete the removed space.
-      space::Space* old_space = main_space_;
-      RemoveSpace(old_space);
-      delete old_space;
+      if (!IsMovingGc(collector_type_)) {
+        // We are transitioning from non moving GC -> moving GC, since we copied from the bump
+        // pointer space last transition it will be protected.
+        bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+        Compact(bump_pointer_space_, main_space_);
+      }
       break;
     }
     case kCollectorTypeMS:
       // Fall through.
     case kCollectorTypeCMS: {
-      if (IsCompactingGC(collector_type_)) {
-        // TODO: Use mem-map from temp space?
-        MemMap* mem_map = allocator_mem_map_.release();
-        CHECK(mem_map != nullptr);
-        size_t starting_size = kDefaultStartingSize;
-        size_t initial_size = kDefaultInitialSize;
-        mprotect(mem_map->Begin(), initial_size, PROT_READ | PROT_WRITE);
-        CHECK(main_space_ == nullptr);
-        if (kUseRosAlloc) {
-          main_space_ =
-              space::RosAllocSpace::CreateFromMemMap(mem_map, "alloc space", starting_size,
-                                                     initial_size, mem_map->Size(),
-                                                     mem_map->Size(), low_memory_mode_);
-        } else {
-          main_space_ =
-              space::DlMallocSpace::CreateFromMemMap(mem_map, "alloc space", starting_size,
-                                                     initial_size, mem_map->Size(),
-                                                     mem_map->Size());
-        }
-        main_space_->SetFootprintLimit(main_space_->Capacity());
-        AddSpace(main_space_);
+      if (IsMovingGc(collector_type_)) {
+        // Compact to the main space from the bump pointer space, don't need to swap semispaces.
+        main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
         Compact(main_space_, bump_pointer_space_);
       }
       break;
@@ -1470,16 +1481,10 @@
   uint64_t duration = NanoTime() - start_time;
   GrowForUtilization(semi_space_collector_);
   FinishGC(self, collector::kGcTypeFull);
-  int32_t after_size = GetTotalMemory();
-  int32_t delta_size = before_size - after_size;
   int32_t after_allocated = num_bytes_allocated_.Load();
   int32_t delta_allocated = before_allocated - after_allocated;
-  const std::string saved_bytes_str =
-      delta_size < 0 ? "-" + PrettySize(-delta_size) : PrettySize(delta_size);
   LOG(INFO) << "Heap transition to " << process_state_ << " took "
-      << PrettyDuration(duration) << " " << PrettySize(before_size) << "->"
-      << PrettySize(after_size) << " from " << PrettySize(delta_allocated) << " to "
-      << PrettySize(delta_size) << " saved";
+      << PrettyDuration(duration) << " saved at least " << PrettySize(delta_allocated);
 }
 
 void Heap::ChangeCollector(CollectorType collector_type) {
@@ -1554,9 +1559,9 @@
   // Maps from bin sizes to locations.
   std::multimap<size_t, uintptr_t> bins_;
   // Live bitmap of the space which contains the bins.
-  accounting::SpaceBitmap* bin_live_bitmap_;
+  accounting::ContinuousSpaceBitmap* bin_live_bitmap_;
   // Mark bitmap of the space which contains the bins.
-  accounting::SpaceBitmap* bin_mark_bitmap_;
+  accounting::ContinuousSpaceBitmap* bin_mark_bitmap_;
 
   static void Callback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1647,11 +1652,12 @@
   VLOG(heap) << "Starting PreZygoteFork";
   // Trim the pages at the end of the non moving space.
   non_moving_space_->Trim();
+  // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote
+  // there.
   non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
   // Change the collector to the post zygote one.
-  ChangeCollector(post_zygote_collector_type_);
-  // TODO: Delete bump_pointer_space_ and temp_pointer_space_?
-  if (semi_space_collector_ != nullptr) {
+  if (kCompactZygote) {
+    DCHECK(semi_space_collector_ != nullptr);
     // Temporarily disable rosalloc verification because the zygote
     // compaction will mess up the rosalloc internal metadata.
     ScopedDisableRosAllocVerification disable_rosalloc_verif(this);
@@ -1661,18 +1667,47 @@
     space::BumpPointerSpace target_space("zygote bump space", non_moving_space_->End(),
                                          non_moving_space_->Limit());
     // Compact the bump pointer space to a new zygote bump pointer space.
-    temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
-    zygote_collector.SetFromSpace(bump_pointer_space_);
+    bool reset_main_space = false;
+    if (IsMovingGc(collector_type_)) {
+      zygote_collector.SetFromSpace(bump_pointer_space_);
+    } else {
+      CHECK(main_space_ != nullptr);
+      // Copy from the main space.
+      zygote_collector.SetFromSpace(main_space_);
+      reset_main_space = true;
+    }
     zygote_collector.SetToSpace(&target_space);
+
+    Runtime::Current()->GetThreadList()->SuspendAll();
     zygote_collector.Run(kGcCauseCollectorTransition, false);
-    CHECK(temp_space_->IsEmpty());
+    if (IsMovingGc(collector_type_)) {
+      SwapSemiSpaces();
+    }
+    Runtime::Current()->GetThreadList()->ResumeAll();
+
+    if (reset_main_space) {
+      main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+      madvise(main_space_->Begin(), main_space_->Capacity(), MADV_DONTNEED);
+      MemMap* mem_map = main_space_->ReleaseMemMap();
+      RemoveSpace(main_space_);
+      delete main_space_;
+      main_space_ = nullptr;
+      CreateMainMallocSpace(mem_map, kDefaultInitialSize, mem_map->Size(), mem_map->Size());
+      AddSpace(main_space_);
+    } else {
+      bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+    }
+    if (temp_space_ != nullptr) {
+      CHECK(temp_space_->IsEmpty());
+    }
     total_objects_freed_ever_ += semi_space_collector_->GetFreedObjects();
     total_bytes_freed_ever_ += semi_space_collector_->GetFreedBytes();
     // Update the end and write out image.
     non_moving_space_->SetEnd(target_space.End());
     non_moving_space_->SetLimit(target_space.Limit());
-    VLOG(heap) << "Zygote size " << non_moving_space_->Size() << " bytes";
+    VLOG(heap) << "Zygote space size " << non_moving_space_->Size() << " bytes";
   }
+  ChangeCollector(foreground_collector_type_);
   // Save the old space so that we can remove it after we complete creating the zygote space.
   space::MallocSpace* old_alloc_space = non_moving_space_;
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
@@ -1692,18 +1727,12 @@
   }
   space::ZygoteSpace* zygote_space = old_alloc_space->CreateZygoteSpace("alloc space",
                                                                         low_memory_mode_,
-                                                                        &main_space_);
+                                                                        &non_moving_space_);
   delete old_alloc_space;
   CHECK(zygote_space != nullptr) << "Failed creating zygote space";
   AddSpace(zygote_space, false);
-  CHECK(main_space_ != nullptr);
-  if (main_space_->IsRosAllocSpace()) {
-    rosalloc_space_ = main_space_->AsRosAllocSpace();
-  } else if (main_space_->IsDlMallocSpace()) {
-    dlmalloc_space_ = main_space_->AsDlMallocSpace();
-  }
-  main_space_->SetFootprintLimit(main_space_->Capacity());
-  AddSpace(main_space_);
+  non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
+  AddSpace(non_moving_space_);
   have_zygote_space_ = true;
   // Enable large object space allocations.
   large_object_threshold_ = kDefaultLargeObjectThreshold;
@@ -1713,23 +1742,6 @@
   CHECK(mod_union_table != nullptr) << "Failed to create zygote space mod-union table";
   AddModUnionTable(mod_union_table);
   if (collector::SemiSpace::kUseRememberedSet) {
-    // Add a new remembered set for the new main space.
-    accounting::RememberedSet* main_space_rem_set =
-        new accounting::RememberedSet("Main space remembered set", this, main_space_);
-    CHECK(main_space_rem_set != nullptr) << "Failed to create main space remembered set";
-    AddRememberedSet(main_space_rem_set);
-  }
-  // Can't use RosAlloc for non moving space due to thread local buffers.
-  // TODO: Non limited space for non-movable objects?
-  MemMap* mem_map = post_zygote_non_moving_space_mem_map_.release();
-  space::MallocSpace* new_non_moving_space =
-      space::DlMallocSpace::CreateFromMemMap(mem_map, "Non moving dlmalloc space", kPageSize,
-                                             2 * MB, mem_map->Size(), mem_map->Size());
-  AddSpace(new_non_moving_space, false);
-  CHECK(new_non_moving_space != nullptr) << "Failed to create new non-moving space";
-  new_non_moving_space->SetFootprintLimit(new_non_moving_space->Capacity());
-  non_moving_space_ = new_non_moving_space;
-  if (collector::SemiSpace::kUseRememberedSet) {
     // Add a new remembered set for the post-zygote non-moving space.
     accounting::RememberedSet* post_zygote_non_moving_space_rem_set =
         new accounting::RememberedSet("Post-zygote non-moving space remembered set", this,
@@ -1745,8 +1757,8 @@
   allocation_stack_->Reset();
 }
 
-void Heap::MarkAllocStack(accounting::SpaceBitmap* bitmap1,
-                          accounting::SpaceBitmap* bitmap2,
+void Heap::MarkAllocStack(accounting::ContinuousSpaceBitmap* bitmap1,
+                          accounting::ContinuousSpaceBitmap* bitmap2,
                           accounting::ObjectSet* large_objects,
                           accounting::ObjectStack* stack) {
   DCHECK(bitmap1 != nullptr);
@@ -1767,9 +1779,9 @@
 }
 
 void Heap::SwapSemiSpaces() {
-  // Swap the spaces so we allocate into the space which we just evacuated.
+  CHECK(bump_pointer_space_ != nullptr);
+  CHECK(temp_space_ != nullptr);
   std::swap(bump_pointer_space_, temp_space_);
-  bump_pointer_space_->Clear();
 }
 
 void Heap::Compact(space::ContinuousMemMapAllocSpace* target_space,
@@ -1812,7 +1824,7 @@
     MutexLock mu(self, *gc_complete_lock_);
     // Ensure there is only one GC at a time.
     WaitForGcToCompleteLocked(self);
-    compacting_gc = IsCompactingGC(collector_type_);
+    compacting_gc = IsMovingGc(collector_type_);
     // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
     if (compacting_gc && disable_moving_gc_count_ != 0) {
       LOG(WARNING) << "Skipping GC due to disable moving GC count " << disable_moving_gc_count_;
@@ -1867,10 +1879,14 @@
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
   ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
-  if (!clear_soft_references) {
-    clear_soft_references = gc_type != collector::kGcTypeSticky;  // TODO: GSS?
+  if (compacting_gc) {
+    runtime->GetThreadList()->SuspendAll();
+    collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
+    SwapSemiSpaces();
+    runtime->GetThreadList()->ResumeAll();
+  } else {
+    collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
   }
-  collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
   RequestHeapTrim();
@@ -2015,7 +2031,8 @@
           accounting::CardTable::kCardSize);
       LOG(ERROR) << "Card " << reinterpret_cast<void*>(card_addr) << " covers " << cover_begin
           << "-" << cover_end;
-      accounting::SpaceBitmap* bitmap = heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(obj);
+      accounting::ContinuousSpaceBitmap* bitmap =
+          heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(obj);
 
       if (bitmap == nullptr) {
         LOG(ERROR) << "Object " << obj << " has no bitmap";
@@ -2385,9 +2402,11 @@
       WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       // Swapping bound bitmaps does nothing.
       gc->SwapBitmaps();
+      SwapSemiSpaces();
       if (!VerifyHeapReferences()) {
         LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed";
       }
+      SwapSemiSpaces();
       gc->SwapBitmaps();
     }
   }
@@ -2473,25 +2492,11 @@
 
 bool Heap::IsMovableObject(const mirror::Object* obj) const {
   if (kMovingCollector) {
-    DCHECK(!IsInTempSpace(obj));
-    if (bump_pointer_space_->HasAddress(obj)) {
-      return true;
+    space::Space* space = FindContinuousSpaceFromObject(obj, true);
+    if (space != nullptr) {
+      // TODO: Check large object?
+      return space->CanMoveObjects();
     }
-    // TODO: Refactor this logic into the space itself?
-    // Objects in the main space are only copied during background -> foreground transitions or
-    // visa versa.
-    if (main_space_ != nullptr && main_space_->HasAddress(obj) &&
-        (IsCompactingGC(background_collector_type_) ||
-            IsCompactingGC(post_zygote_collector_type_))) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool Heap::IsInTempSpace(const mirror::Object* obj) const {
-  if (temp_space_->HasAddress(obj) && !temp_space_->Contains(obj)) {
-    return true;
   }
   return false;
 }
@@ -2547,7 +2552,7 @@
     // We also check that the bytes allocated aren't over the footprint limit in order to prevent a
     // pathological case where dead objects which aren't reclaimed by sticky could get accumulated
     // if the sticky GC throughput always remained >= the full/partial throughput.
-    if (collector_ran->GetEstimatedLastIterationThroughput() >=
+    if (collector_ran->GetEstimatedLastIterationThroughput() * kStickyGcThroughputAdjustment >=
         non_sticky_collector->GetEstimatedMeanThroughput() &&
         non_sticky_collector->GetIterations() > 0 &&
         bytes_allocated <= max_allowed_footprint_) {
@@ -2793,7 +2798,7 @@
       if (IsGcConcurrent()) {
         RequestConcurrentGC(self);
       } else {
-        CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
+        CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false);
       }
     }
   }
@@ -2862,7 +2867,7 @@
 void Heap::ClearMarkedObjects() {
   // Clear all of the spaces' mark bitmaps.
   for (const auto& space : GetContinuousSpaces()) {
-    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
     if (space->GetLiveBitmap() != mark_bitmap) {
       mark_bitmap->Clear();
     }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index a8989ec..874357f 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -150,7 +150,7 @@
   explicit Heap(size_t initial_size, size_t growth_limit, size_t min_free,
                 size_t max_free, double target_utilization, size_t capacity,
                 const std::string& original_image_file_name,
-                CollectorType post_zygote_collector_type, CollectorType background_collector_type,
+                CollectorType foreground_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
                 bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
@@ -196,8 +196,6 @@
   void VisitObjects(ObjectCallback callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  void SwapSemiSpaces() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation);
@@ -249,10 +247,6 @@
   // Returns true if there is any chance that the object (obj) will move.
   bool IsMovableObject(const mirror::Object* obj) const;
 
-  // Returns true if an object is in the temp space, if this happens its usually indicative of
-  // compaction related errors.
-  bool IsInTempSpace(const mirror::Object* obj) const;
-
   // Enables us to compacting GC until objects are released.
   void IncrementDisableMovingGC(Thread* self);
   void DecrementDisableMovingGC(Thread* self);
@@ -476,7 +470,9 @@
       LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
 
   // Mark all the objects in the allocation stack in the specified bitmap.
-  void MarkAllocStack(accounting::SpaceBitmap* bitmap1, accounting::SpaceBitmap* bitmap2,
+  // TODO: Refactor?
+  void MarkAllocStack(accounting::SpaceBitmap<kObjectAlignment>* bitmap1,
+                      accounting::SpaceBitmap<kObjectAlignment>* bitmap2,
                       accounting::ObjectSet* large_objects, accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -568,7 +564,8 @@
 
  private:
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
-               space::ContinuousMemMapAllocSpace* source_space);
+               space::ContinuousMemMapAllocSpace* source_space)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void FinishGC(Thread* self, collector::GcType gc_type) LOCKS_EXCLUDED(gc_complete_lock_);
 
@@ -580,7 +577,7 @@
   static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
     return AllocatorHasAllocationStack(allocator_type);
   }
-  static bool IsCompactingGC(CollectorType collector_type) {
+  static bool IsMovingGc(CollectorType collector_type) {
     return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS ||
         collector_type == kCollectorTypeCC;
   }
@@ -609,6 +606,10 @@
                                size_t bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
+  // wrong space.
+  void SwapSemiSpaces() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Try to allocate a number of bytes, this function never does any GCs. Needs to be inlined so
   // that the switch statement is constant optimized in the entrypoints.
   template <const bool kInstrumented, const bool kGrow>
@@ -668,6 +669,10 @@
   // Find a collector based on GC type.
   collector::GarbageCollector* FindCollectorByGcType(collector::GcType gc_type);
 
+  // Create the main free list space, typically either a RosAlloc space or DlMalloc space.
+  void CreateMainMallocSpace(MemMap* mem_map, size_t initial_size, size_t growth_limit,
+                             size_t capacity);
+
   // Given the current contents of the alloc space, increase the allowed heap footprint to match
   // the target utilization ratio.  This should only be called immediately after a full garbage
   // collection.
@@ -737,17 +742,10 @@
   // A remembered set remembers all of the references from the it's space to the target space.
   SafeMap<space::Space*, accounting::RememberedSet*> remembered_sets_;
 
-  // Keep the free list allocator mem map lying around when we transition to background so that we
-  // don't have to worry about virtual address space fragmentation.
-  UniquePtr<MemMap> allocator_mem_map_;
-
-  // The mem-map which we will use for the non-moving space after the zygote is done forking:
-  UniquePtr<MemMap> post_zygote_non_moving_space_mem_map_;
-
   // The current collector type.
   CollectorType collector_type_;
-  // Which collector we will switch to after zygote fork.
-  CollectorType post_zygote_collector_type_;
+  // Which collector we use when the app is in the foreground.
+  CollectorType foreground_collector_type_;
   // Which collector we will use when the app is notified of a transition to background.
   CollectorType background_collector_type_;
   // Desired collector type, heap trimming daemon transitions the heap if it is != collector_type_.
diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc
index 07e5088..a85ad4d 100644
--- a/runtime/gc/heap_test.cc
+++ b/runtime/gc/heap_test.cc
@@ -60,13 +60,11 @@
 
 TEST_F(HeapTest, HeapBitmapCapacityTest) {
   byte* heap_begin = reinterpret_cast<byte*>(0x1000);
-  const size_t heap_capacity = accounting::SpaceBitmap::kAlignment * (sizeof(intptr_t) * 8 + 1);
-  UniquePtr<accounting::SpaceBitmap> bitmap(accounting::SpaceBitmap::Create("test bitmap",
-                                                                            heap_begin,
-                                                                            heap_capacity));
+  const size_t heap_capacity = kObjectAlignment * (sizeof(intptr_t) * 8 + 1);
+  UniquePtr<accounting::ContinuousSpaceBitmap> bitmap(
+      accounting::ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   mirror::Object* fake_end_of_heap_object =
-      reinterpret_cast<mirror::Object*>(&heap_begin[heap_capacity -
-                                                    accounting::SpaceBitmap::kAlignment]);
+      reinterpret_cast<mirror::Object*>(&heap_begin[heap_capacity - kObjectAlignment]);
   bitmap->Set(fake_end_of_heap_object);
 }
 
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index a955cc8..90ffe59 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -38,6 +38,10 @@
   return new BumpPointerSpace(name, mem_map.release());
 }
 
+BumpPointerSpace* BumpPointerSpace::CreateFromMemMap(const std::string& name, MemMap* mem_map) {
+  return new BumpPointerSpace(name, mem_map);
+}
+
 BumpPointerSpace::BumpPointerSpace(const std::string& name, byte* begin, byte* limit)
     : ContinuousMemMapAllocSpace(name, nullptr, begin, begin, limit,
                                  kGcRetentionPolicyAlwaysCollect),
@@ -61,9 +65,6 @@
 void BumpPointerSpace::Clear() {
   // Release the pages back to the operating system.
   CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed";
-}
-
-void BumpPointerSpace::Reset() {
   // Reset the end of the space back to the beginning, we move the end forward as we allocate
   // objects.
   SetEnd(Begin());
@@ -196,7 +197,7 @@
   }
 }
 
-accounting::SpaceBitmap::SweepCallback* BumpPointerSpace::GetSweepCallback() {
+accounting::ContinuousSpaceBitmap::SweepCallback* BumpPointerSpace::GetSweepCallback() {
   LOG(FATAL) << "Unimplemented";
   return nullptr;
 }
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 3ab5df4..e52a9a3 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -43,6 +43,7 @@
   // guaranteed to be granted, if it is required, the caller should call Begin on the returned
   // space to confirm the request was granted.
   static BumpPointerSpace* Create(const std::string& name, size_t capacity, byte* requested_begin);
+  static BumpPointerSpace* CreateFromMemMap(const std::string& name, MemMap* mem_map);
 
   // Allocate num_bytes, returns nullptr if the space is full.
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -84,19 +85,16 @@
     return GetMemMap()->Size();
   }
 
-  accounting::SpaceBitmap* GetLiveBitmap() const OVERRIDE {
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
     return nullptr;
   }
 
-  accounting::SpaceBitmap* GetMarkBitmap() const OVERRIDE {
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
     return nullptr;
   }
 
-  // Madvise the memory back to the OS.
-  void Clear() OVERRIDE;
-
-  // Reset the pointer to the start of the space.
-  void Reset() OVERRIDE LOCKS_EXCLUDED(block_lock_);
+  // Reset the space to empty.
+  void Clear() OVERRIDE LOCKS_EXCLUDED(block_lock_);
 
   void Dump(std::ostream& os) const;
 
@@ -113,6 +111,9 @@
     return Begin() == End();
   }
 
+  bool CanMoveObjects() const OVERRIDE {
+    return true;
+  }
 
   bool Contains(const mirror::Object* obj) const {
     const byte* byte_obj = reinterpret_cast<const byte*>(obj);
@@ -137,7 +138,7 @@
   void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  accounting::SpaceBitmap::SweepCallback* GetSweepCallback() OVERRIDE;
+  accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() OVERRIDE;
 
   // Object alignment within the space.
   static constexpr size_t kAlignment = 8;
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index 02d8b54..4c8a35e 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -52,7 +52,7 @@
 inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes,
                                                                size_t* bytes_allocated,
                                                                size_t* usable_size) {
-  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_for_alloc_, num_bytes));
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
   if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 60f566c..41a0458 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dlmalloc_space.h"
-
 #include "dlmalloc_space-inl.h"
+
 #include "gc/accounting/card_table.h"
+#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -36,15 +36,19 @@
 template class ValgrindMallocSpace<DlMallocSpace, void*>;
 
 DlMallocSpace::DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
-                             byte* end, byte* limit, size_t growth_limit)
-    : MallocSpace(name, mem_map, begin, end, limit, growth_limit),
-      mspace_(mspace), mspace_for_alloc_(mspace) {
+                             byte* end, byte* limit, size_t growth_limit,
+                             bool can_move_objects, size_t starting_size,
+                             size_t initial_size)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit, true, can_move_objects,
+                  starting_size, initial_size),
+      mspace_(mspace) {
   CHECK(mspace != NULL);
 }
 
 DlMallocSpace* DlMallocSpace::CreateFromMemMap(MemMap* mem_map, const std::string& name,
                                                size_t starting_size, size_t initial_size,
-                                               size_t growth_limit, size_t capacity) {
+                                               size_t growth_limit, size_t capacity,
+                                               bool can_move_objects) {
   DCHECK(mem_map != nullptr);
   void* mspace = CreateMspace(mem_map->Begin(), starting_size, initial_size);
   if (mspace == nullptr) {
@@ -62,14 +66,17 @@
   byte* begin = mem_map->Begin();
   if (Runtime::Current()->RunningOnValgrind()) {
     return new ValgrindMallocSpace<DlMallocSpace, void*>(
-        name, mem_map, mspace, begin, end, begin + capacity, growth_limit, initial_size);
+        name, mem_map, mspace, begin, end, begin + capacity, growth_limit, initial_size,
+        can_move_objects, starting_size);
   } else {
-    return new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
+    return new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit,
+                             can_move_objects, starting_size, initial_size);
   }
 }
 
-DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                                     size_t capacity, byte* requested_begin) {
+DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size,
+                                     size_t growth_limit, size_t capacity, byte* requested_begin,
+                                     bool can_move_objects) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
@@ -93,7 +100,7 @@
     return nullptr;
   }
   DlMallocSpace* space = CreateFromMemMap(mem_map, name, starting_size, initial_size,
-                                          growth_limit, capacity);
+                                          growth_limit, capacity, can_move_objects);
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "DlMallocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
@@ -143,8 +150,10 @@
 
 MallocSpace* DlMallocSpace::CreateInstance(const std::string& name, MemMap* mem_map,
                                            void* allocator, byte* begin, byte* end,
-                                           byte* limit, size_t growth_limit) {
-  return new DlMallocSpace(name, mem_map, allocator, begin, end, limit, growth_limit);
+                                           byte* limit, size_t growth_limit,
+                                           bool can_move_objects) {
+  return new DlMallocSpace(name, mem_map, allocator, begin, end, limit, growth_limit,
+                           can_move_objects, starting_size_, initial_size_);
 }
 
 size_t DlMallocSpace::Free(Thread* self, mirror::Object* ptr) {
@@ -280,13 +289,13 @@
 }
 
 void DlMallocSpace::Clear() {
+  size_t footprint_limit = GetFootprintLimit();
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
-  GetLiveBitmap()->Clear();
-  GetMarkBitmap()->Clear();
-}
-
-void DlMallocSpace::Reset() {
-  // TODO: Delete and create new mspace here.
+  live_bitmap_->Clear();
+  mark_bitmap_->Clear();
+  end_ = Begin() + starting_size_;
+  mspace_ = CreateMspace(mem_map_->Begin(), starting_size_, initial_size_);
+  SetFootprintLimit(footprint_limit);
 }
 
 #ifndef NDEBUG
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 76c4489..accd26b 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -36,14 +36,15 @@
   // Create a DlMallocSpace from an existing mem_map.
   static DlMallocSpace* CreateFromMemMap(MemMap* mem_map, const std::string& name,
                                          size_t starting_size, size_t initial_size,
-                                         size_t growth_limit, size_t capacity);
+                                         size_t growth_limit, size_t capacity,
+                                         bool can_move_objects);
 
   // Create a DlMallocSpace with the requested sizes. The requested
   // base address is not guaranteed to be granted, if it is required,
   // the caller should call Begin on the returned space to confirm the
   // request was granted.
   static DlMallocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                               size_t capacity, byte* requested_begin);
+                               size_t capacity, byte* requested_begin, bool can_move_objects);
 
   // Virtual to allow ValgrindMallocSpace to intercept.
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -107,13 +108,13 @@
   void SetFootprintLimit(size_t limit) OVERRIDE;
 
   MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
-                              byte* begin, byte* end, byte* limit, size_t growth_limit);
+                              byte* begin, byte* end, byte* limit, size_t growth_limit,
+                              bool can_move_objects);
 
   uint64_t GetBytesAllocated() OVERRIDE;
   uint64_t GetObjectsAllocated() OVERRIDE;
 
-  void Clear() OVERRIDE;
-  void Reset() OVERRIDE;
+  virtual void Clear() OVERRIDE;
 
   bool IsDlMallocSpace() const OVERRIDE {
     return true;
@@ -125,7 +126,8 @@
 
  protected:
   DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin, byte* end,
-                byte* limit, size_t growth_limit);
+                byte* limit, size_t growth_limit, bool can_move_objects, size_t starting_size,
+                size_t initial_size);
 
  private:
   mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -142,11 +144,7 @@
   static const size_t kChunkOverhead = kWordSize;
 
   // Underlying malloc space.
-  void* const mspace_;
-
-  // An mspace pointer used for allocation. Equals  mspace_ or nullptr after InvalidateAllocator()
-  // is called.
-  void* mspace_for_alloc_;
+  void* mspace_;
 
   friend class collector::MarkSweep;
 
diff --git a/runtime/gc/space/dlmalloc_space_base_test.cc b/runtime/gc/space/dlmalloc_space_base_test.cc
index 508d869..129eace 100644
--- a/runtime/gc/space/dlmalloc_space_base_test.cc
+++ b/runtime/gc/space/dlmalloc_space_base_test.cc
@@ -23,7 +23,7 @@
 
 MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin);
+  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
 }
 
 TEST_SPACE_CREATE_FN_BASE(DlMallocSpace, CreateDlMallocSpace)
diff --git a/runtime/gc/space/dlmalloc_space_random_test.cc b/runtime/gc/space/dlmalloc_space_random_test.cc
index 43a1bf0..c4f8bae 100644
--- a/runtime/gc/space/dlmalloc_space_random_test.cc
+++ b/runtime/gc/space/dlmalloc_space_random_test.cc
@@ -23,7 +23,7 @@
 
 MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin);
+  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
 }
 
 TEST_SPACE_CREATE_FN_RANDOM(DlMallocSpace, CreateDlMallocSpace)
diff --git a/runtime/gc/space/dlmalloc_space_static_test.cc b/runtime/gc/space/dlmalloc_space_static_test.cc
index 4fbc81e..edaa198 100644
--- a/runtime/gc/space/dlmalloc_space_static_test.cc
+++ b/runtime/gc/space/dlmalloc_space_static_test.cc
@@ -23,7 +23,7 @@
 
 MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin);
+  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
 }
 
 TEST_SPACE_CREATE_FN_STATIC(DlMallocSpace, CreateDlMallocSpace)
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index faa539f..91d8820 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -35,7 +35,7 @@
 Atomic<uint32_t> ImageSpace::bitmap_index_(0);
 
 ImageSpace::ImageSpace(const std::string& name, MemMap* mem_map,
-                       accounting::SpaceBitmap* live_bitmap)
+                       accounting::ContinuousSpaceBitmap* live_bitmap)
     : MemMapSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
                   kGcRetentionPolicyNeverCollect) {
   DCHECK(live_bitmap != nullptr);
@@ -197,10 +197,10 @@
   uint32_t bitmap_index = bitmap_index_.FetchAndAdd(1);
   std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_file_name,
                                        bitmap_index));
-  UniquePtr<accounting::SpaceBitmap> bitmap(
-      accounting::SpaceBitmap::CreateFromMemMap(bitmap_name, image_map.release(),
-                                                reinterpret_cast<byte*>(map->Begin()),
-                                                map->Size()));
+  UniquePtr<accounting::ContinuousSpaceBitmap> bitmap(
+      accounting::ContinuousSpaceBitmap::CreateFromMemMap(bitmap_name, image_map.release(),
+                                                          reinterpret_cast<byte*>(map->Begin()),
+                                                          map->Size()));
   if (bitmap.get() == nullptr) {
     *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
     return nullptr;
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 116c498..f6daf89 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_GC_SPACE_IMAGE_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_IMAGE_SPACE_H_
 
+#include "gc/accounting/space_bitmap.h"
 #include "space.h"
 
 namespace art {
@@ -59,11 +60,11 @@
     return GetName();
   }
 
-  accounting::SpaceBitmap* GetLiveBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
     return live_bitmap_.get();
   }
 
-  accounting::SpaceBitmap* GetMarkBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
     // ImageSpaces have the same bitmap for both live and marked. This helps reduce the number of
     // special cases to test against.
     return live_bitmap_.get();
@@ -75,6 +76,10 @@
   void Sweep(bool /* swap_bitmaps */, size_t* /* freed_objects */, size_t* /* freed_bytes */) {
   }
 
+  bool CanMoveObjects() const OVERRIDE {
+    return false;
+  }
+
  private:
   // Tries to initialize an ImageSpace from the given image path,
   // returning NULL on error.
@@ -96,9 +101,10 @@
 
   static Atomic<uint32_t> bitmap_index_;
 
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::ContinuousSpaceBitmap> live_bitmap_;
 
-  ImageSpace(const std::string& name, MemMap* mem_map, accounting::SpaceBitmap* live_bitmap);
+  ImageSpace(const std::string& name, MemMap* mem_map,
+             accounting::ContinuousSpaceBitmap* live_bitmap);
 
   // The OatFile associated with the image during early startup to
   // reserve space contiguous to the image. It is later released to
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index eb01325..18e518f 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -75,6 +75,10 @@
 
   void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
 
+  virtual bool CanMoveObjects() const OVERRIDE {
+    return false;
+  }
+
  protected:
   explicit LargeObjectSpace(const std::string& name);
 
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index dac043e..eaf14fb 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -37,24 +37,26 @@
 
 MallocSpace::MallocSpace(const std::string& name, MemMap* mem_map,
                          byte* begin, byte* end, byte* limit, size_t growth_limit,
-                         bool create_bitmaps)
+                         bool create_bitmaps, bool can_move_objects, size_t starting_size,
+                         size_t initial_size)
     : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
       recent_free_pos_(0), lock_("allocation space lock", kAllocSpaceLock),
-      growth_limit_(growth_limit) {
+      growth_limit_(growth_limit), can_move_objects_(can_move_objects),
+      starting_size_(starting_size), initial_size_(initial_size) {
   if (create_bitmaps) {
     size_t bitmap_index = bitmap_index_++;
     static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
     CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
     CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
-    live_bitmap_.reset(accounting::SpaceBitmap::Create(
+    live_bitmap_.reset(accounting::ContinuousSpaceBitmap::Create(
         StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
         Begin(), Capacity()));
-    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #"
+    DCHECK(live_bitmap_.get() != nullptr) << "could not create allocspace live bitmap #"
         << bitmap_index;
-    mark_bitmap_.reset(accounting::SpaceBitmap::Create(
+    mark_bitmap_.reset(accounting::ContinuousSpaceBitmap::Create(
         StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
         Begin(), Capacity()));
-    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #"
+    DCHECK(live_bitmap_.get() != nullptr) << "could not create allocspace mark bitmap #"
         << bitmap_index;
   }
   for (auto& freed : recent_freed_objects_) {
@@ -178,11 +180,6 @@
              << "GrowthLimit " << growth_limit_ << "\n"
              << "Capacity " << Capacity();
   SetGrowthLimit(RoundUp(size, kPageSize));
-  SetFootprintLimit(RoundUp(size, kPageSize));
-
-  // TODO: Not hardcode these in?
-  const size_t starting_size = kPageSize;
-  const size_t initial_size = 2 * MB;
   // FIXME: Do we need reference counted pointers here?
   // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
   VLOG(heap) << "Creating new AllocSpace: ";
@@ -194,14 +191,14 @@
   UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
                                                     PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
-  void* allocator = CreateAllocator(end_, starting_size, initial_size, capacity, low_memory_mode);
+  void* allocator = CreateAllocator(end_, starting_size_, initial_size_, capacity, low_memory_mode);
   // Protect memory beyond the initial size.
-  byte* end = mem_map->Begin() + starting_size;
-  if (capacity - initial_size > 0) {
-    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
+  byte* end = mem_map->Begin() + starting_size_;
+  if (capacity > initial_size_) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size_, PROT_NONE), alloc_space_name);
   }
   *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, end_, end,
-                                     limit_, growth_limit);
+                                     limit_, growth_limit, CanMoveObjects());
   SetLimit(End());
   live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
   CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
@@ -236,7 +233,7 @@
   // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
   // the bitmaps as an optimization.
   if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* bitmap = space->GetLiveBitmap();
     for (size_t i = 0; i < num_ptrs; ++i) {
       bitmap->Clear(ptrs[i]);
     }
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index fbcee5f..d24016c 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -114,7 +114,8 @@
   void SetGrowthLimit(size_t growth_limit);
 
   virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
-                                      byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
+                                      byte* begin, byte* end, byte* limit, size_t growth_limit,
+                                      bool can_move_objects) = 0;
 
   // Splits ourself into a zygote space and new malloc space which has our unused memory. When true,
   // the low memory mode argument specifies that the heap wishes the created space to be more
@@ -127,9 +128,14 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
+  bool CanMoveObjects() const OVERRIDE {
+    return can_move_objects_;
+  }
+
  protected:
   MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
-              byte* limit, size_t growth_limit, bool create_bitmaps = true);
+              byte* limit, size_t growth_limit, bool create_bitmaps, bool can_move_objects,
+              size_t starting_size, size_t initial_size);
 
   static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
                               size_t* growth_limit, size_t* capacity, byte* requested_begin);
@@ -143,7 +149,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+  virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() {
     return &SweepCallback;
   }
 
@@ -167,6 +173,13 @@
   // one time by a call to ClearGrowthLimit.
   size_t growth_limit_;
 
+  // True if objects in the space are movable.
+  const bool can_move_objects_;
+
+  // Starting and initial sized, used when you reset the space.
+  const size_t starting_size_;
+  const size_t initial_size_;
+
  private:
   static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 5c5e7f8..5a7d941 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-#include "rosalloc_space.h"
-
 #include "rosalloc_space-inl.h"
+
 #include "gc/accounting/card_table.h"
+#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -34,19 +34,23 @@
 
 static constexpr bool kPrefetchDuringRosAllocFreeList = true;
 
-template class ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>;
+// TODO: Fix
+// template class ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>;
 
 RosAllocSpace::RosAllocSpace(const std::string& name, MemMap* mem_map,
                              art::gc::allocator::RosAlloc* rosalloc, byte* begin, byte* end,
-                             byte* limit, size_t growth_limit)
-    : MallocSpace(name, mem_map, begin, end, limit, growth_limit), rosalloc_(rosalloc) {
-  CHECK(rosalloc != NULL);
+                             byte* limit, size_t growth_limit, bool can_move_objects,
+                             size_t starting_size, size_t initial_size, bool low_memory_mode)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit, true, can_move_objects,
+                  starting_size, initial_size),
+      rosalloc_(rosalloc), low_memory_mode_(low_memory_mode) {
+  CHECK(rosalloc != nullptr);
 }
 
 RosAllocSpace* RosAllocSpace::CreateFromMemMap(MemMap* mem_map, const std::string& name,
                                                size_t starting_size, size_t initial_size,
                                                size_t growth_limit, size_t capacity,
-                                               bool low_memory_mode) {
+                                               bool low_memory_mode, bool can_move_objects) {
   DCHECK(mem_map != nullptr);
   allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size,
                                                  capacity, low_memory_mode);
@@ -66,10 +70,10 @@
   // TODO: Fix RosAllocSpace to support valgrind. There is currently some issues with
   // AllocationSize caused by redzones. b/12944686
   if (false && Runtime::Current()->GetHeap()->RunningOnValgrind()) {
-    return new ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>(
-        name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
+    LOG(FATAL) << "Unimplemented";
   } else {
-    return new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
+    return new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit,
+                             can_move_objects, starting_size, initial_size, low_memory_mode);
   }
 }
 
@@ -79,7 +83,7 @@
 
 RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size,
                                      size_t growth_limit, size_t capacity, byte* requested_begin,
-                                     bool low_memory_mode) {
+                                     bool low_memory_mode, bool can_move_objects) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
@@ -104,7 +108,8 @@
   }
 
   RosAllocSpace* space = CreateFromMemMap(mem_map, name, starting_size, initial_size,
-                                          growth_limit, capacity, low_memory_mode);
+                                          growth_limit, capacity, low_memory_mode,
+                                          can_move_objects);
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "RosAllocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
@@ -113,7 +118,8 @@
   return space;
 }
 
-allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start, size_t initial_size,
+allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start,
+                                                   size_t initial_size,
                                                    size_t maximum_size, bool low_memory_mode) {
   // clear errno to allow PLOG on error
   errno = 0;
@@ -154,9 +160,11 @@
 }
 
 MallocSpace* RosAllocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
-                                           byte* begin, byte* end, byte* limit, size_t growth_limit) {
+                                           byte* begin, byte* end, byte* limit, size_t growth_limit,
+                                           bool can_move_objects) {
   return new RosAllocSpace(name, mem_map, reinterpret_cast<allocator::RosAlloc*>(allocator),
-                           begin, end, limit, growth_limit);
+                           begin, end, limit, growth_limit, can_move_objects, starting_size_,
+                           initial_size_, low_memory_mode_);
 }
 
 size_t RosAllocSpace::Free(Thread* self, mirror::Object* ptr) {
@@ -333,13 +341,15 @@
 }
 
 void RosAllocSpace::Clear() {
+  size_t footprint_limit = GetFootprintLimit();
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
-  GetLiveBitmap()->Clear();
-  GetMarkBitmap()->Clear();
-}
-
-void RosAllocSpace::Reset() {
-  // TODO: Delete and create new mspace here.
+  live_bitmap_->Clear();
+  mark_bitmap_->Clear();
+  end_ = begin_ + starting_size_;
+  delete rosalloc_;
+  rosalloc_ = CreateRosAlloc(mem_map_->Begin(), starting_size_, initial_size_, Capacity(),
+                             low_memory_mode_);
+  SetFootprintLimit(footprint_limit);
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 900e7a9..a156738 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -39,11 +39,12 @@
   // the caller should call Begin on the returned space to confirm the
   // request was granted.
   static RosAllocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                               size_t capacity, byte* requested_begin, bool low_memory_mode);
+                               size_t capacity, byte* requested_begin, bool low_memory_mode,
+                               bool can_move_objects);
   static RosAllocSpace* CreateFromMemMap(MemMap* mem_map, const std::string& name,
                                          size_t starting_size, size_t initial_size,
                                          size_t growth_limit, size_t capacity,
-                                         bool low_memory_mode);
+                                         bool low_memory_mode, bool can_move_objects);
 
   mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated,
                                   size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_);
@@ -80,9 +81,10 @@
   void SetFootprintLimit(size_t limit) OVERRIDE;
 
   void Clear() OVERRIDE;
-  void Reset() OVERRIDE;
+
   MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
-                              byte* begin, byte* end, byte* limit, size_t growth_limit);
+                              byte* begin, byte* end, byte* limit, size_t growth_limit,
+                              bool can_move_objects) OVERRIDE;
 
   uint64_t GetBytesAllocated() OVERRIDE;
   uint64_t GetObjectsAllocated() OVERRIDE;
@@ -110,7 +112,8 @@
 
  protected:
   RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
-                byte* begin, byte* end, byte* limit, size_t growth_limit);
+                byte* begin, byte* end, byte* limit, size_t growth_limit, bool can_move_objects,
+                size_t starting_size, size_t initial_size, bool low_memory_mode);
 
  private:
   mirror::Object* AllocCommon(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -132,7 +135,9 @@
       LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
 
   // Underlying rosalloc.
-  allocator::RosAlloc* const rosalloc_;
+  allocator::RosAlloc* rosalloc_;
+
+  const bool low_memory_mode_;
 
   friend class collector::MarkSweep;
 
diff --git a/runtime/gc/space/rosalloc_space_base_test.cc b/runtime/gc/space/rosalloc_space_base_test.cc
index df42076..c3157fa 100644
--- a/runtime/gc/space/rosalloc_space_base_test.cc
+++ b/runtime/gc/space/rosalloc_space_base_test.cc
@@ -23,7 +23,7 @@
 MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
   return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode());
+                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
 }
 
 TEST_SPACE_CREATE_FN_BASE(RosAllocSpace, CreateRosAllocSpace)
diff --git a/runtime/gc/space/rosalloc_space_random_test.cc b/runtime/gc/space/rosalloc_space_random_test.cc
index 4d37c9e..864bbc9 100644
--- a/runtime/gc/space/rosalloc_space_random_test.cc
+++ b/runtime/gc/space/rosalloc_space_random_test.cc
@@ -23,7 +23,7 @@
 MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
   return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode());
+                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
 }
 
 TEST_SPACE_CREATE_FN_RANDOM(RosAllocSpace, CreateRosAllocSpace)
diff --git a/runtime/gc/space/rosalloc_space_static_test.cc b/runtime/gc/space/rosalloc_space_static_test.cc
index 9f11fd0..c0e2ac8 100644
--- a/runtime/gc/space/rosalloc_space_static_test.cc
+++ b/runtime/gc/space/rosalloc_space_static_test.cc
@@ -23,7 +23,7 @@
 MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
   return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode());
+                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
 }
 
 TEST_SPACE_CREATE_FN_STATIC(RosAllocSpace, CreateRosAllocSpace)
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index 4af65a9..01e8b04 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -18,6 +18,7 @@
 
 #include "base/logging.h"
 #include "gc/accounting/heap_bitmap.h"
+#include "gc/accounting/space_bitmap-inl.h"
 #include "runtime.h"
 #include "thread-inl.h"
 
@@ -77,8 +78,8 @@
 void ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
   DCHECK(freed_objects != nullptr);
   DCHECK(freed_bytes != nullptr);
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = GetMarkBitmap();
+  accounting::ContinuousSpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::ContinuousSpaceBitmap* mark_bitmap = GetMarkBitmap();
   // If the bitmaps are bound then sweeping this space clearly won't do anything.
   if (live_bitmap == mark_bitmap) {
     return;
@@ -94,11 +95,9 @@
     std::swap(live_bitmap, mark_bitmap);
   }
   // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
-  accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
-                                     reinterpret_cast<uintptr_t>(Begin()),
-                                     reinterpret_cast<uintptr_t>(End()),
-                                     GetSweepCallback(),
-                                     reinterpret_cast<void*>(&scc));
+  accounting::ContinuousSpaceBitmap::SweepWalk(
+      *live_bitmap, *mark_bitmap, reinterpret_cast<uintptr_t>(Begin()),
+      reinterpret_cast<uintptr_t>(End()), GetSweepCallback(), reinterpret_cast<void*>(&scc));
   *freed_objects += scc.freed_objects;
   *freed_bytes += scc.freed_bytes;
 }
@@ -106,9 +105,9 @@
 // Returns the old mark bitmap.
 void ContinuousMemMapAllocSpace::BindLiveToMarkBitmap() {
   CHECK(!HasBoundBitmaps());
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::ContinuousSpaceBitmap* live_bitmap = GetLiveBitmap();
   if (live_bitmap != mark_bitmap_.get()) {
-    accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = mark_bitmap_.release();
     Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
     temp_bitmap_.reset(mark_bitmap);
     mark_bitmap_.reset(live_bitmap);
@@ -122,7 +121,7 @@
 void ContinuousMemMapAllocSpace::UnBindBitmaps() {
   CHECK(HasBoundBitmaps());
   // At this point, the temp_bitmap holds our old mark bitmap.
-  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
+  accounting::ContinuousSpaceBitmap* new_bitmap = temp_bitmap_.release();
   Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap_.get(), new_bitmap);
   CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
   mark_bitmap_.reset(new_bitmap);
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 37d7c80..2b27f87 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -34,10 +34,6 @@
 
 namespace gc {
 
-namespace accounting {
-  class SpaceBitmap;
-}  // namespace accounting
-
 class Heap;
 
 namespace space {
@@ -160,6 +156,9 @@
   }
   virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace();
 
+  // Returns true if objects in the space are movable.
+  virtual bool CanMoveObjects() const = 0;
+
   virtual ~Space() {}
 
  protected:
@@ -265,8 +264,8 @@
     return End() - Begin();
   }
 
-  virtual accounting::SpaceBitmap* GetLiveBitmap() const = 0;
-  virtual accounting::SpaceBitmap* GetMarkBitmap() const = 0;
+  virtual accounting::ContinuousSpaceBitmap* GetLiveBitmap() const = 0;
+  virtual accounting::ContinuousSpaceBitmap* GetMarkBitmap() const = 0;
 
   // Maximum which the mapped space can grow to.
   virtual size_t Capacity() const {
@@ -396,27 +395,24 @@
   // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
   void SwapBitmaps();
 
-  // Free all memory associated with this space.
+  // Clear the space back to an empty space.
   virtual void Clear() = 0;
 
-  // Reset the space back to an empty space.
-  virtual void Reset() = 0;
-
-  accounting::SpaceBitmap* GetLiveBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const {
     return live_bitmap_.get();
   }
 
-  accounting::SpaceBitmap* GetMarkBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const {
     return mark_bitmap_.get();
   }
 
   void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
-  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() = 0;
+  virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() = 0;
 
  protected:
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+  UniquePtr<accounting::ContinuousSpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::ContinuousSpaceBitmap> mark_bitmap_;
+  UniquePtr<accounting::ContinuousSpaceBitmap> temp_bitmap_;
 
   ContinuousMemMapAllocSpace(const std::string& name, MemMap* mem_map, byte* begin,
                              byte* end, byte* limit, GcRetentionPolicy gc_retention_policy)
diff --git a/runtime/gc/space/valgrind_malloc_space-inl.h b/runtime/gc/space/valgrind_malloc_space-inl.h
index ed97e60..966c276 100644
--- a/runtime/gc/space/valgrind_malloc_space-inl.h
+++ b/runtime/gc/space/valgrind_malloc_space-inl.h
@@ -95,8 +95,10 @@
 ValgrindMallocSpace<S, A>::ValgrindMallocSpace(const std::string& name, MemMap* mem_map,
                                                A allocator, byte* begin,
                                                byte* end, byte* limit, size_t growth_limit,
-                                               size_t initial_size) :
-    S(name, mem_map, allocator, begin, end, limit, growth_limit) {
+                                               size_t initial_size,
+                                               bool can_move_objects, size_t starting_size) :
+    S(name, mem_map, allocator, begin, end, limit, growth_limit, can_move_objects, starting_size,
+      initial_size) {
   VALGRIND_MAKE_MEM_UNDEFINED(mem_map->Begin() + initial_size, mem_map->Size() - initial_size);
 }
 
diff --git a/runtime/gc/space/valgrind_malloc_space.h b/runtime/gc/space/valgrind_malloc_space.h
index 6b755c4..200ad83 100644
--- a/runtime/gc/space/valgrind_malloc_space.h
+++ b/runtime/gc/space/valgrind_malloc_space.h
@@ -48,7 +48,7 @@
 
   ValgrindMallocSpace(const std::string& name, MemMap* mem_map, AllocatorType allocator,
                       byte* begin, byte* end, byte* limit, size_t growth_limit,
-                      size_t initial_size);
+                      size_t initial_size, bool can_move_objects, size_t starting_size);
   virtual ~ValgrindMallocSpace() {}
 
  private:
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index d1c3d03..1b06b63 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -40,8 +40,8 @@
 };
 
 ZygoteSpace* ZygoteSpace::Create(const std::string& name, MemMap* mem_map,
-                                 accounting::SpaceBitmap* live_bitmap,
-                                 accounting::SpaceBitmap* mark_bitmap) {
+                                 accounting::ContinuousSpaceBitmap* live_bitmap,
+                                 accounting::ContinuousSpaceBitmap* mark_bitmap) {
   DCHECK(live_bitmap != nullptr);
   DCHECK(mark_bitmap != nullptr);
   size_t objects_allocated = 0;
@@ -61,10 +61,6 @@
   LOG(FATAL) << "Unimplemented";
 }
 
-void ZygoteSpace::Reset() {
-  LOG(FATAL) << "Unimplemented";
-}
-
 ZygoteSpace::ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated)
     : ContinuousMemMapAllocSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
                                  kGcRetentionPolicyFullCollect),
@@ -109,7 +105,7 @@
   // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
   // the bitmaps as an optimization.
   if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = zygote_space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* bitmap = zygote_space->GetLiveBitmap();
     for (size_t i = 0; i < num_ptrs; ++i) {
       bitmap->Clear(ptrs[i]);
     }
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
index 8880548..50fc62b 100644
--- a/runtime/gc/space/zygote_space.h
+++ b/runtime/gc/space/zygote_space.h
@@ -17,16 +17,13 @@
 #ifndef ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
 
+#include "gc/accounting/space_bitmap.h"
 #include "malloc_space.h"
 #include "mem_map.h"
 
 namespace art {
 namespace gc {
 
-namespace accounting {
-class SpaceBitmap;
-}
-
 namespace space {
 
 // An zygote space is a space which you cannot allocate into or free from.
@@ -34,8 +31,8 @@
  public:
   // Returns the remaining storage in the out_map field.
   static ZygoteSpace* Create(const std::string& name, MemMap* mem_map,
-                             accounting::SpaceBitmap* live_bitmap,
-                             accounting::SpaceBitmap* mark_bitmap)
+                             accounting::ContinuousSpaceBitmap* live_bitmap,
+                             accounting::ContinuousSpaceBitmap* mark_bitmap)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Dump(std::ostream& os) const;
@@ -72,10 +69,13 @@
   }
 
   void Clear() OVERRIDE;
-  void Reset() OVERRIDE;
+
+  bool CanMoveObjects() const OVERRIDE {
+    return false;
+  }
 
  protected:
-  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+  virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() {
     return &SweepCallback;
   }
 
diff --git a/runtime/globals.h b/runtime/globals.h
index 7e85231..e3c54b8 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -50,9 +50,6 @@
 // Required stack alignment
 static constexpr size_t kStackAlignment = 16;
 
-// Required object alignment
-static constexpr size_t kObjectAlignment = 8;
-
 // ARM instruction alignment. ARM processors require code to be 4-byte aligned,
 // but ARM ELF requires 8..
 static constexpr size_t kArmAlignment = 8;
@@ -72,6 +69,10 @@
 // compile-time constant so the compiler can generate better code.
 static constexpr int kPageSize = 4096;
 
+// Required object alignment
+static constexpr size_t kObjectAlignment = 8;
+static constexpr size_t kLargeObjectAlignment = kPageSize;
+
 // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
 #if defined(NDEBUG)
 static constexpr bool kIsDebugBuild = false;
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index f4eecfc..a08becf 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -81,6 +81,10 @@
     return mask_ != peer.mask_;
   }
 
+  bool operator<=(const InstructionSetFeatures &peer) const {
+    return (mask_ & peer.mask_) == mask_;
+  }
+
  private:
   uint32_t mask_;
 };
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 525e2b3..bcde9e5 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -63,6 +63,7 @@
       interpret_only_(false), forced_interpret_only_(false),
       have_method_entry_listeners_(false), have_method_exit_listeners_(false),
       have_method_unwind_listeners_(false), have_dex_pc_listeners_(false),
+      have_field_read_listeners_(false), have_field_write_listeners_(false),
       have_exception_caught_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
@@ -373,6 +374,14 @@
     dex_pc_listeners_.push_back(listener);
     have_dex_pc_listeners_ = true;
   }
+  if ((events & kFieldRead) != 0) {
+    field_read_listeners_.push_back(listener);
+    have_field_read_listeners_ = true;
+  }
+  if ((events & kFieldWritten) != 0) {
+    field_write_listeners_.push_back(listener);
+    have_field_write_listeners_ = true;
+  }
   if ((events & kExceptionCaught) != 0) {
     exception_caught_listeners_.push_back(listener);
     have_exception_caught_listeners_ = true;
@@ -410,6 +419,22 @@
     }
     have_dex_pc_listeners_ = dex_pc_listeners_.size() > 0;
   }
+  if ((events & kFieldRead) != 0) {
+    bool contains = std::find(field_read_listeners_.begin(), field_read_listeners_.end(),
+                              listener) != field_read_listeners_.end();
+    if (contains) {
+      field_read_listeners_.remove(listener);
+    }
+    have_field_read_listeners_ = field_read_listeners_.size() > 0;
+  }
+  if ((events & kFieldWritten) != 0) {
+    bool contains = std::find(field_write_listeners_.begin(), field_write_listeners_.end(),
+                              listener) != field_write_listeners_.end();
+    if (contains) {
+      field_write_listeners_.remove(listener);
+    }
+    have_field_write_listeners_ = field_write_listeners_.size() > 0;
+  }
   if ((events & kExceptionCaught) != 0) {
     exception_caught_listeners_.remove(listener);
     have_exception_caught_listeners_ = exception_caught_listeners_.size() > 0;
@@ -743,6 +768,30 @@
   }
 }
 
+void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
+                                         mirror::ArtMethod* method, uint32_t dex_pc,
+                                         mirror::ArtField* field) const {
+  if (have_field_read_listeners_) {
+    // TODO: same comment than DexPcMovedEventImpl.
+    std::list<InstrumentationListener*> copy(field_read_listeners_);
+    for (InstrumentationListener* listener : copy) {
+      listener->FieldRead(thread, this_object, method, dex_pc, field);
+    }
+  }
+}
+
+void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
+                                         mirror::ArtMethod* method, uint32_t dex_pc,
+                                         mirror::ArtField* field, const JValue& field_value) const {
+  if (have_field_write_listeners_) {
+    // TODO: same comment than DexPcMovedEventImpl.
+    std::list<InstrumentationListener*> copy(field_write_listeners_);
+    for (InstrumentationListener* listener : copy) {
+      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
+    }
+  }
+}
+
 void Instrumentation::ExceptionCaughtEvent(Thread* thread, const ThrowLocation& throw_location,
                                            mirror::ArtMethod* catch_method,
                                            uint32_t catch_dex_pc,
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 2a9c35f..3de0728 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -28,6 +28,7 @@
 
 namespace art {
 namespace mirror {
+  class ArtField;
   class ArtMethod;
   class Class;
   class Object;
@@ -78,6 +79,14 @@
                           mirror::ArtMethod* method, uint32_t new_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
+  // Call-back for when we read from a field.
+  virtual void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                         uint32_t dex_pc, mirror::ArtField* field) = 0;
+
+  // Call-back for when we write into a field.
+  virtual void FieldWritten(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                            uint32_t dex_pc, mirror::ArtField* field, const JValue& field_value) = 0;
+
   // Call-back when an exception is caught.
   virtual void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
                                mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
@@ -92,11 +101,13 @@
 class Instrumentation {
  public:
   enum InstrumentationEvent {
-    kMethodEntered = 1,
-    kMethodExited = 2,
-    kMethodUnwind = 4,
-    kDexPcMoved = 8,
-    kExceptionCaught = 16
+    kMethodEntered =   1 << 0,
+    kMethodExited =    1 << 1,
+    kMethodUnwind =    1 << 2,
+    kDexPcMoved =      1 << 3,
+    kFieldRead =       1 << 4,
+    kFieldWritten =    1 << 5,
+    kExceptionCaught = 1 << 6,
   };
 
   Instrumentation();
@@ -217,6 +228,14 @@
     return have_dex_pc_listeners_;
   }
 
+  bool HasFieldReadListeners() const {
+    return have_field_read_listeners_;
+  }
+
+  bool HasFieldWriteListeners() const {
+    return have_field_write_listeners_;
+  }
+
   bool IsActive() const {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_exception_caught_listeners_ || have_method_unwind_listeners_;
@@ -256,6 +275,26 @@
     }
   }
 
+  // Inform listeners that we read a field (only supported by the interpreter).
+  void FieldReadEvent(Thread* thread, mirror::Object* this_object,
+                      mirror::ArtMethod* method, uint32_t dex_pc,
+                      mirror::ArtField* field) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(HasFieldReadListeners())) {
+      FieldReadEventImpl(thread, this_object, method, dex_pc, field);
+    }
+  }
+
+  // Inform listeners that we write a field (only supported by the interpreter).
+  void FieldWriteEvent(Thread* thread, mirror::Object* this_object,
+                       mirror::ArtMethod* method, uint32_t dex_pc,
+                       mirror::ArtField* field, const JValue& field_value) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(HasFieldWriteListeners())) {
+      FieldWriteEventImpl(thread, this_object, method, dex_pc, field, field_value);
+    }
+  }
+
   // Inform listeners that an exception was caught.
   void ExceptionCaughtEvent(Thread* thread, const ThrowLocation& throw_location,
                             mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
@@ -313,6 +352,14 @@
   void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                            mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
+                           mirror::ArtMethod* method, uint32_t dex_pc,
+                           mirror::ArtField* field) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
+                           mirror::ArtMethod* method, uint32_t dex_pc,
+                           mirror::ArtField* field, const JValue& field_value) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Have we hijacked ArtMethod::code_ so that it calls instrumentation/interpreter code?
   bool instrumentation_stubs_installed_;
@@ -345,6 +392,14 @@
   // instrumentation_lock_.
   bool have_dex_pc_listeners_;
 
+  // Do we have any listeners for field read events? Short-cut to avoid taking the
+  // instrumentation_lock_.
+  bool have_field_read_listeners_;
+
+  // Do we have any listeners for field write events? Short-cut to avoid taking the
+  // instrumentation_lock_.
+  bool have_field_write_listeners_;
+
   // Do we have any exception caught listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_exception_caught_listeners_;
 
@@ -353,6 +408,8 @@
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> field_read_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> field_write_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
   // The set of methods being deoptimized (by the debugger) which must be executed with interpreter
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index 66ebb96..1477324 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -197,6 +197,17 @@
      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
+   * A field of interest has been accessed or modified. This is used for field access and field
+   * modification events.
+   *
+   * "fieldValue" is non-null for field modification events only.
+   * "is_modification" is true for field modification, false for field access.
+   */
+  bool PostFieldEvent(const JdwpLocation* pLoc, RefTypeId typeId, FieldId fieldId,
+                      ObjectId thisPtr, const JValue* fieldValue, bool is_modification)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  /*
    * An exception has been thrown.
    *
    * Pass in a zeroed-out "*pCatchLoc" if the exception wasn't caught.
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 9b3ea2e..6908047 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -121,26 +121,14 @@
   /* nothing for StepOnly -- handled differently */
 };
 
-/*
- * Dump an event to the log file.
- */
-static void dumpEvent(const JdwpEvent* pEvent) {
-  LOG(INFO) << StringPrintf("Event id=0x%4x %p (prev=%p next=%p):", pEvent->requestId, pEvent, pEvent->prev, pEvent->next);
-  LOG(INFO) << "  kind=" << pEvent->eventKind << " susp=" << pEvent->suspend_policy << " modCount=" << pEvent->modCount;
-
-  for (int i = 0; i < pEvent->modCount; i++) {
-    const JdwpEventMod* pMod = &pEvent->mods[i];
-    LOG(INFO) << "  " << pMod->modKind;
-    /* TODO - show details */
-  }
-}
-
 static bool NeedsFullDeoptimization(JdwpEventKind eventKind) {
   switch (eventKind) {
       case EK_METHOD_ENTRY:
       case EK_METHOD_EXIT:
       case EK_METHOD_EXIT_WITH_RETURN_VALUE:
       case EK_SINGLE_STEP:
+      case EK_FIELD_ACCESS:
+      case EK_FIELD_MODIFICATION:
         return true;
       default:
         return false;
@@ -177,9 +165,6 @@
       if (status != ERR_NONE) {
         return status;
       }
-    } else if (pMod->modKind == MK_FIELD_ONLY) {
-      /* should be for EK_FIELD_ACCESS or EK_FIELD_MODIFICATION */
-      dumpEvent(pEvent);  /* TODO - need for field watches */
     }
   }
   if (NeedsFullDeoptimization(pEvent->eventKind)) {
@@ -422,6 +407,9 @@
     case MK_COUNT:
       CHECK_GT(pMod->count.count, 0);
       pMod->count.count--;
+      if (pMod->count.count > 0) {
+        return false;
+      }
       break;
     case MK_CONDITIONAL:
       CHECK(false);  // should not be getting these
@@ -843,6 +831,86 @@
   return match_count != 0;
 }
 
+bool JdwpState::PostFieldEvent(const JdwpLocation* pLoc, RefTypeId typeId, FieldId fieldId,
+                               ObjectId thisPtr, const JValue* fieldValue, bool is_modification) {
+  ModBasket basket;
+  basket.pLoc = pLoc;
+  basket.classId = pLoc->class_id;
+  basket.thisPtr = thisPtr;
+  basket.threadId = Dbg::GetThreadSelfId();
+  basket.className = Dbg::GetClassName(pLoc->class_id);
+  basket.field = fieldId;
+
+  if (InvokeInProgress()) {
+    VLOG(jdwp) << "Not posting field event during invoke";
+    return false;
+  }
+
+  // Get field's reference type tag.
+  JDWP::JdwpTypeTag type_tag;
+  uint32_t class_status;  // unused here.
+  JdwpError error = Dbg::GetClassInfo(typeId, &type_tag, &class_status, NULL);
+  if (error != ERR_NONE) {
+    return false;
+  }
+
+  // Get instance type tag.
+  uint8_t tag;
+  error = Dbg::GetObjectTag(thisPtr, tag);
+  if (error != ERR_NONE) {
+    return false;
+  }
+
+  int match_count = 0;
+  ExpandBuf* pReq = NULL;
+  JdwpSuspendPolicy suspend_policy = SP_NONE;
+  {
+    MutexLock mu(Thread::Current(), event_list_lock_);
+    JdwpEvent** match_list = AllocMatchList(event_list_size_);
+
+    if (is_modification) {
+      FindMatchingEvents(EK_FIELD_MODIFICATION, &basket, match_list, &match_count);
+    } else {
+      FindMatchingEvents(EK_FIELD_ACCESS, &basket, match_list, &match_count);
+    }
+    if (match_count != 0) {
+      VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
+                 << basket.className << "." << Dbg::GetMethodName(pLoc->method_id)
+                 << StringPrintf(" thread=%#" PRIx64 "  dex_pc=%#" PRIx64 ")",
+                                 basket.threadId, pLoc->dex_pc);
+
+      suspend_policy = scanSuspendPolicy(match_list, match_count);
+      VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+
+      pReq = eventPrep();
+      expandBufAdd1(pReq, suspend_policy);
+      expandBufAdd4BE(pReq, match_count);
+
+      for (int i = 0; i < match_count; i++) {
+        expandBufAdd1(pReq, match_list[i]->eventKind);
+        expandBufAdd4BE(pReq, match_list[i]->requestId);
+        expandBufAdd8BE(pReq, basket.threadId);
+        expandBufAddLocation(pReq, *pLoc);
+        expandBufAdd1(pReq, type_tag);
+        expandBufAddRefTypeId(pReq, typeId);
+        expandBufAddFieldId(pReq, fieldId);
+        expandBufAdd1(pReq, tag);
+        expandBufAddObjectId(pReq, thisPtr);
+        if (is_modification) {
+          Dbg::OutputFieldValue(fieldId, fieldValue, pReq);
+        }
+      }
+    }
+
+    CleanupMatchList(match_list, match_count);
+  }
+
+  Dbg::ManageDeoptimization();
+
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
+  return match_count != 0;
+}
+
 /*
  * A thread is starting or stopping.
  *
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index c2a2b54..5ffe753 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -372,7 +372,7 @@
   expandBufAdd1(reply, false);   // canAddMethod
   expandBufAdd1(reply, false);   // canUnrestrictedlyRedefineClasses
   expandBufAdd1(reply, false);   // canPopFrames
-  expandBufAdd1(reply, false);   // canUseInstanceFilters
+  expandBufAdd1(reply, true);    // canUseInstanceFilters
   expandBufAdd1(reply, false);   // canGetSourceDebugExtension
   expandBufAdd1(reply, false);   // canRequestVMDeathEvent
   expandBufAdd1(reply, false);   // canSetDefaultStratum
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 5fc0228..8e22c1d 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -237,55 +237,41 @@
   Locks::mutator_lock_->AssertNotHeld(self);
   UniquePtr<JdwpState> state(new JdwpState(options));
   switch (options->transport) {
-  case kJdwpTransportSocket:
-    InitSocketTransport(state.get(), options);
-    break;
+    case kJdwpTransportSocket:
+      InitSocketTransport(state.get(), options);
+      break;
 #ifdef HAVE_ANDROID_OS
-  case kJdwpTransportAndroidAdb:
-    InitAdbTransport(state.get(), options);
-    break;
+    case kJdwpTransportAndroidAdb:
+      InitAdbTransport(state.get(), options);
+      break;
 #endif
-  default:
-    LOG(FATAL) << "Unknown transport: " << options->transport;
+    default:
+      LOG(FATAL) << "Unknown transport: " << options->transport;
   }
 
-  if (!options->suspend) {
+  {
     /*
      * Grab a mutex before starting the thread.  This ensures they
      * won't signal the cond var before we're waiting.
      */
     MutexLock thread_start_locker(self, state->thread_start_lock_);
+
     /*
      * We have bound to a port, or are trying to connect outbound to a
      * debugger.  Create the JDWP thread and let it continue the mission.
      */
-    CHECK_PTHREAD_CALL(pthread_create, (&state->pthread_, NULL, StartJdwpThread, state.get()), "JDWP thread");
+    CHECK_PTHREAD_CALL(pthread_create, (&state->pthread_, nullptr, StartJdwpThread, state.get()),
+                       "JDWP thread");
 
     /*
      * Wait until the thread finishes basic initialization.
-     * TODO: cond vars should be waited upon in a loop
      */
-    state->thread_start_cond_.Wait(self);
-  } else {
-    {
-      /*
-       * Grab a mutex before starting the thread.  This ensures they
-       * won't signal the cond var before we're waiting.
-       */
-      MutexLock thread_start_locker(self, state->thread_start_lock_);
-      /*
-       * We have bound to a port, or are trying to connect outbound to a
-       * debugger.  Create the JDWP thread and let it continue the mission.
-       */
-      CHECK_PTHREAD_CALL(pthread_create, (&state->pthread_, NULL, StartJdwpThread, state.get()), "JDWP thread");
-
-      /*
-       * Wait until the thread finishes basic initialization.
-       * TODO: cond vars should be waited upon in a loop
-       */
+    while (!state->debug_thread_started_) {
       state->thread_start_cond_.Wait(self);
     }
+  }
 
+  if (options->suspend) {
     /*
      * For suspend=y, wait for the debugger to connect to us or for us to
      * connect to the debugger.
@@ -481,11 +467,8 @@
     /* process requests until the debugger drops */
     bool first = true;
     while (!Dbg::IsDisposed()) {
-      {
-        // sanity check -- shouldn't happen?
-        MutexLock mu(thread_, *Locks::thread_suspend_count_lock_);
-        CHECK_EQ(thread_->GetState(), kWaitingInMainDebuggerLoop);
-      }
+      // sanity check -- shouldn't happen?
+      CHECK_EQ(thread_->GetState(), kWaitingInMainDebuggerLoop);
 
       if (!netState->ProcessIncoming()) {
         /* blocking read */
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index d9155f5..be7e9f2 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -39,6 +39,32 @@
 namespace art {
 namespace mirror {
 
+class CopyReferenceFieldsWithReadBarrierVisitor {
+ public:
+  explicit CopyReferenceFieldsWithReadBarrierVisitor(Object* dest_obj)
+      : dest_obj_(dest_obj) {}
+
+  void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const
+      ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // GetFieldObject() contains a RB.
+    Object* ref = obj->GetFieldObject<Object>(offset, false);
+    // No WB here as a large object space does not have a card table
+    // coverage. Instead, cards will be marked separately.
+    dest_obj_->SetFieldObjectWithoutWriteBarrier<false, false>(offset, ref, false);
+  }
+
+  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+      ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Copy java.lang.ref.Reference.referent which isn't visited in
+    // Object::VisitReferences().
+    DCHECK(klass->IsReferenceClass());
+    this->operator()(ref, mirror::Reference::ReferentOffset(), false);
+  }
+
+ private:
+  Object* const dest_obj_;
+};
+
 static Object* CopyObject(Thread* self, mirror::Object* dest, mirror::Object* src, size_t num_bytes)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Copy instance data.  We assume memcpy copies by words.
@@ -47,6 +73,13 @@
   byte* dst_bytes = reinterpret_cast<byte*>(dest);
   size_t offset = sizeof(Object);
   memcpy(dst_bytes + offset, src_bytes + offset, num_bytes - offset);
+  if (kUseBakerOrBrooksReadBarrier) {
+    // We need a RB here. After the memcpy that covers the whole
+    // object above, copy references fields one by one again with a
+    // RB. TODO: Optimize this later?
+    CopyReferenceFieldsWithReadBarrierVisitor visitor(dest);
+    src->VisitReferences<true>(visitor, visitor);
+  }
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // Perform write barriers on copied object references.
   Class* c = src->GetClass();
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 8032cc3..e0c14c3 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -128,7 +128,27 @@
   CHECK_EQ(sizeof(HeapReference<T>), sizeof(uint32_t));
   IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
   IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
-  dstAsIntArray->Memmove(dst_pos, srcAsIntArray, src_pos, count);
+  if (kUseBakerOrBrooksReadBarrier) {
+    // TODO: Optimize this later?
+    const bool copy_forward = (src != this) || (dst_pos < src_pos) || (dst_pos - src_pos >= count);
+    if (copy_forward) {
+      // Forward copy.
+      for (int i = 0; i < count; ++i) {
+        // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
+        Object* obj = src->GetWithoutChecks(src_pos + i);
+        SetWithoutChecks<false>(dst_pos + i, obj);
+      }
+    } else {
+      // Backward copy.
+      for (int i = count - 1; i >= 0; --i) {
+        // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
+        Object* obj = src->GetWithoutChecks(src_pos + i);
+        SetWithoutChecks<false>(dst_pos + i, obj);
+      }
+    }
+  } else {
+    dstAsIntArray->Memmove(dst_pos, srcAsIntArray, src_pos, count);
+  }
   Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
   if (kIsDebugBuild) {
     for (int i = 0; i < count; ++i) {
@@ -151,7 +171,16 @@
   CHECK_EQ(sizeof(HeapReference<T>), sizeof(uint32_t));
   IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
   IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
-  dstAsIntArray->Memcpy(dst_pos, srcAsIntArray, src_pos, count);
+  if (kUseBakerOrBrooksReadBarrier) {
+    // TODO: Optimize this later?
+    for (int i = 0; i < count; ++i) {
+      // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
+      T* obj = src->GetWithoutChecks(src_pos + i);
+      SetWithoutChecks<false>(dst_pos + i, obj);
+    }
+  } else {
+    dstAsIntArray->Memcpy(dst_pos, srcAsIntArray, src_pos, count);
+  }
   Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
   if (kIsDebugBuild) {
     for (int i = 0; i < count; ++i) {
@@ -176,6 +205,7 @@
   int i = 0;
   for (; i < count; ++i) {
     // The follow get operations force the objects to be verified.
+    // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
     o = src->GetWithoutChecks(src_pos + i);
     if (o == nullptr) {
       // Null is always assignable.
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index bab0604..6af16f4 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#include <unistd.h>
+#include <algorithm>
 #include <fcntl.h>
+#include <set>
+#include <unistd.h>
 
 #include "base/logging.h"
 #include "class_linker.h"
@@ -30,6 +32,7 @@
 #include "mirror/string.h"
 #include "oat.h"
 #include "os.h"
+#include "profiler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -101,6 +104,7 @@
 
   uint32_t dex_location_checksum;
   uint32_t* dex_location_checksum_pointer = &dex_location_checksum;
+  std::vector<std::string> error_msgs;
   std::string error_msg;
   if (!DexFile::GetChecksum(sourceName.c_str(), dex_location_checksum_pointer, &error_msg)) {
     dex_location_checksum_pointer = NULL;
@@ -110,9 +114,8 @@
   const DexFile* dex_file;
   if (outputName.c_str() == nullptr) {
     // FindOrCreateOatFileForDexLocation can tolerate a missing dex_location_checksum
-    error_msg.clear();
     dex_file = linker->FindDexFileInOatFileFromDexLocation(sourceName.c_str(),
-                                                           dex_location_checksum_pointer, &error_msg);
+                                                           dex_location_checksum_pointer, &error_msgs);
   } else {
     // FindOrCreateOatFileForDexLocation requires the dex_location_checksum
     if (dex_location_checksum_pointer == NULL) {
@@ -122,12 +125,19 @@
       return 0;
     }
     dex_file = linker->FindOrCreateOatFileForDexLocation(sourceName.c_str(), dex_location_checksum,
-                                                         outputName.c_str(), &error_msg);
+                                                         outputName.c_str(), &error_msgs);
   }
   if (dex_file == nullptr) {
     ScopedObjectAccess soa(env);
-    CHECK(!error_msg.empty());
-    ThrowIOException("%s", error_msg.c_str());
+    CHECK(!error_msgs.empty());
+    // The most important message is at the end. So set up nesting by going forward, which will
+    // wrap the existing exception as a cause for the following one.
+    auto it = error_msgs.begin();
+    auto itEnd = error_msgs.end();
+    for ( ; it != itEnd; ++it) {
+      ThrowWrappedIOException("%s", it->c_str());
+    }
+
     return 0;
   }
   return static_cast<jlong>(reinterpret_cast<uintptr_t>(dex_file));
@@ -230,13 +240,31 @@
   close(fd2);
 }
 
+static double GetDoubleProperty(const char* property, double minValue, double maxValue, double defaultValue) {
+#ifndef HAVE_ANDROID_OS
+  return defaultValue;
+#else
+  char buf[PROP_VALUE_MAX];
+  char* endptr;
+
+  property_get(property, buf, "");
+  double value = strtod(buf, &endptr);
+
+  if (value == 0 && endptr == buf) {
+    value = defaultValue;
+  } else if (value < minValue || value > maxValue) {
+    value = defaultValue;
+  }
+  return value;
+#endif
+}
+
 static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
     jstring javaPkgname, jboolean defer) {
   const bool kVerboseLogging = false;  // Spammy logging.
   const bool kDebugLogging = true;  // Logging useful for debugging.
 
   ScopedUtfChars filename(env, javaFilename);
-
   if ((filename.c_str() == nullptr) || !OS::FileExists(filename.c_str())) {
     LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename.c_str() << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
@@ -282,7 +310,6 @@
     struct stat profstat, prevstat;
     int e1 = stat(profile_file.c_str(), &profstat);
     int e2 = stat(prev_profile_file.c_str(), &prevstat);
-
     if (e1 < 0) {
       // No profile file, need to run dex2oat
       if (kDebugLogging) {
@@ -290,48 +317,47 @@
       }
       return JNI_TRUE;
     }
+
     if (e2 == 0) {
       // There is a previous profile file.  Check if the profile has changed significantly.
-      // Let's use the file size as a proxy for significance.  If the new profile is 10%
-      // different in size than the the old profile then we run dex2oat.
-      double newsize = profstat.st_size;
-      double oldsize = prevstat.st_size;
-      bool need_profile = false;
+      // A change in profile is considered significant if X% (change_thr property) of the top K%
+      // (compile_thr property) samples has changed.
 
-      double ratio = 0;     // If the old file was empty and the new one not
-      if (oldsize > 0 && newsize > 0) {
-        ratio = newsize / oldsize;
-      } else if (oldsize == 0 && newsize > 0) {
-        need_profile = true;
-      } else if (oldsize > 0 && newsize == 0) {
-        // Unlikely to happen, but cover all the bases.
-        need_profile = true;
+      double topKThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.compile_thr", 10.0, 90.0, 90.0);
+      double changeThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.change_thr", 1.0, 90.0, 10.0);
+      double changePercent = 0.0;
+      std::set<std::string> newTopK, oldTopK;
+      bool newOk = ProfileHelper::LoadTopKSamples(newTopK, profile_file, topKThreshold);
+      bool oldOk = ProfileHelper::LoadTopKSamples(oldTopK, prev_profile_file, topKThreshold);
+      if (!newOk || !oldOk) {
+        if (kDebugLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded Ignoring invalid profiles: "
+                    << (newOk ?  "" : profile_file) << " " << (oldOk ? "" : prev_profile_file);
+        }
+      } else if (newTopK.empty()) {
+        if (kDebugLogging && kVerboseLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded empty profile: " << profile_file;
+        }
+        // If the new topK is empty we shouldn't optimize so we leave the changePercent at 0.0.
+      } else {
+        std::set<std::string> diff;
+        std::set_difference(newTopK.begin(), newTopK.end(), oldTopK.begin(), oldTopK.end(),
+          std::inserter(diff, diff.end()));
+        // TODO: consider using the usedPercentage instead of the plain diff count.
+        changePercent = 100.0 * static_cast<double>(diff.size()) / static_cast<double>(newTopK.size());
+        if (kDebugLogging && kVerboseLogging) {
+          std::set<std::string>::iterator end = diff.end();
+          for (std::set<std::string>::iterator it = diff.begin(); it != end; it++) {
+            LOG(INFO) << "DexFile_isDexOptNeeded new in topK: " << *it;
+          }
+        }
       }
 
-      double significant_difference = 10.0;
-#ifdef HAVE_ANDROID_OS
-      // Switch off profiler if the dalvik.vm.profiler property has value 0.
-      char buf[PROP_VALUE_MAX];
-      property_get("dalvik.vm.profiler.dex2oat.threshold", buf, "10.0");
-      significant_difference = strtod(buf, nullptr);
-
-      // Something reasonable?
-      if (significant_difference < 1.0 || significant_difference > 90.0) {
-        significant_difference = 10.0;
-      }
-#endif      // The percentage difference that we consider as being significant.
-      double diff_hwm = 1.0 + significant_difference/10.0;
-      double diff_lwm = 1.0 - significant_difference/10.0;
-
-      if (ratio > diff_hwm || ratio < diff_lwm) {
-        need_profile = true;
-      }
-
-      if (need_profile) {
+      if (changePercent > changeThreshold) {
         if (kDebugLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded size of new profile file " << profile_file <<
-          " is significantly different from old profile file " << prev_profile_file << " (new: " <<
-          newsize << ", old: " << oldsize << ", ratio: " << ratio << ")";
+          " is significantly different from old profile file " << prev_profile_file << " (top "
+          << topKThreshold << "% samples changed in proportion of " << changePercent << "%)";
         }
         if (!defer) {
           CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 08a674f..29fe536 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -194,6 +194,7 @@
   profile_duration_s_ = 20;          // Seconds.
   profile_interval_us_ = 500;       // Microseconds.
   profile_backoff_coefficient_ = 2.0;
+  profile_start_immediately_ = true;
   profile_clock_source_ = kDefaultProfilerClockSource;
 
   verify_ = true;
@@ -509,6 +510,8 @@
       if (!ParseDouble(option, ':', 1.0, 10.0, &profile_backoff_coefficient_)) {
         return false;
       }
+    } else if (option == "-Xprofile-start-lazy") {
+      profile_start_immediately_ = false;
     } else if (StartsWith(option, "-implicit-checks:")) {
       std::string checks;
       if (!ParseStringAfterChar(option, ':', &checks)) {
@@ -581,10 +584,10 @@
         Usage("Unknown -Xverify option %s", verify_mode.c_str());
         return false;
       }
-    } else if (StartsWith(option, "-ea:") ||
-               StartsWith(option, "-da:") ||
-               StartsWith(option, "-enableassertions:") ||
-               StartsWith(option, "-disableassertions:") ||
+    } else if (StartsWith(option, "-ea") ||
+               StartsWith(option, "-da") ||
+               StartsWith(option, "-enableassertions") ||
+               StartsWith(option, "-disableassertions") ||
                (option == "--runtime-arg") ||
                (option == "-esa") ||
                (option == "-dsa") ||
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 416bc78..126096a 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -79,6 +79,7 @@
   uint32_t profile_duration_s_;
   uint32_t profile_interval_us_;
   double profile_backoff_coefficient_;
+  bool profile_start_immediately_;
   ProfilerClockSource profile_clock_source_;
   bool verify_;
 
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 4770a54..7b117f4 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -16,6 +16,7 @@
 
 #include "profiler.h"
 
+#include <fstream>
 #include <sys/uio.h>
 #include <sys/file.h>
 
@@ -302,14 +303,12 @@
 
   // Only on target...
 #ifdef HAVE_ANDROID_OS
-  if (!startImmediately) {
-    // Switch off profiler if the dalvik.vm.profiler property has value 0.
-    char buf[PROP_VALUE_MAX];
-    property_get("dalvik.vm.profiler", buf, "0");
-    if (strcmp(buf, "0") == 0) {
-      LOG(INFO) << "Profiler disabled.  To enable setprop dalvik.vm.profiler 1";
-      return;
-    }
+  // Switch off profiler if the dalvik.vm.profiler property has value 0.
+  char buf[PROP_VALUE_MAX];
+  property_get("dalvik.vm.profiler", buf, "0");
+  if (strcmp(buf, "0") == 0) {
+    LOG(INFO) << "Profiler disabled.  To enable setprop dalvik.vm.profiler 1";
+    return;
   }
 #endif
 
@@ -579,5 +578,101 @@
     previous_[methodname] = PreviousValue(count, size);
   }
 }
-}  // namespace art
 
+bool ProfileHelper::LoadProfileMap(ProfileMap& profileMap, const std::string& fileName) {
+  LOG(VERBOSE) << "reading profile file " << fileName;
+  struct stat st;
+  int err = stat(fileName.c_str(), &st);
+  if (err == -1) {
+    LOG(VERBOSE) << "not found";
+    return false;
+  }
+  if (st.st_size == 0) {
+    return false;  // Empty profiles are invalid.
+  }
+  std::ifstream in(fileName.c_str());
+  if (!in) {
+    LOG(VERBOSE) << "profile file " << fileName << " exists but can't be opened";
+    LOG(VERBOSE) << "file owner: " << st.st_uid << ":" << st.st_gid;
+    LOG(VERBOSE) << "me: " << getuid() << ":" << getgid();
+    LOG(VERBOSE) << "file permissions: " << std::oct << st.st_mode;
+    LOG(VERBOSE) << "errno: " << errno;
+    return false;
+  }
+  // The first line contains summary information.
+  std::string line;
+  std::getline(in, line);
+  if (in.eof()) {
+    return false;
+  }
+  std::vector<std::string> summary_info;
+  Split(line, '/', summary_info);
+  if (summary_info.size() != 3) {
+    // Bad summary info.  It should be count/total/bootpath.
+    return false;
+  }
+  // This is the number of hits in all methods.
+  uint32_t total_count = 0;
+  for (int i = 0 ; i < 3; ++i) {
+    total_count += atoi(summary_info[i].c_str());
+  }
+
+  // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
+  // Store the info in descending order given by the most used methods.
+  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
+  ProfileSet countSet;
+  while (!in.eof()) {
+    std::getline(in, line);
+    if (in.eof()) {
+      break;
+    }
+    std::vector<std::string> info;
+    Split(line, '/', info);
+    if (info.size() != 3) {
+      // Malformed.
+      break;
+    }
+    int count = atoi(info[1].c_str());
+    countSet.insert(std::make_pair(-count, info));
+  }
+
+  uint32_t curTotalCount = 0;
+  ProfileSet::iterator end = countSet.end();
+  const ProfileData* prevData = nullptr;
+  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
+    const std::string& methodname = it->second[0];
+    uint32_t count = -it->first;
+    uint32_t size = atoi(it->second[2].c_str());
+    double usedPercent = (count * 100.0) / total_count;
+
+    curTotalCount += count;
+    // Methods with the same count should be part of the same top K percentage bucket.
+    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
+      ? prevData->GetTopKUsedPercentage()
+      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
+
+    // Add it to the profile map.
+    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
+    profileMap[methodname] = curData;
+    prevData = &curData;
+  }
+  return true;
+}
+
+bool ProfileHelper::LoadTopKSamples(std::set<std::string>& topKSamples, const std::string& fileName,
+                                    double topKPercentage) {
+  ProfileMap profileMap;
+  bool loadOk = LoadProfileMap(profileMap, fileName);
+  if (!loadOk) {
+    return false;
+  }
+  ProfileMap::iterator end = profileMap.end();
+  for (ProfileMap::iterator it = profileMap.begin(); it != end; it++) {
+    if (it->second.GetTopKUsedPercentage() < topKPercentage) {
+      topKSamples.insert(it->first);
+    }
+  }
+  return true;
+}
+
+}  // namespace art
diff --git a/runtime/profiler.h b/runtime/profiler.h
index b03b170..31fdc79 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -39,7 +39,6 @@
 }  // namespace mirror
 class Thread;
 
-
 //
 // This class holds all the results for all runs of the profiler.  It also
 // counts the number of null methods (where we can't determine the method) and
@@ -63,7 +62,7 @@
  private:
   uint32_t Hash(mirror::ArtMethod* method);
   static constexpr int kHashSize = 17;
-  Mutex& lock_;         // Reference to the main profiler lock - we don't need two of them.
+  Mutex& lock_;                   // Reference to the main profiler lock - we don't need two of them.
   uint32_t num_samples_;          // Total number of samples taken.
   uint32_t num_null_methods_;     // Number of samples where can don't know the method.
   uint32_t num_boot_methods_;     // Number of samples in the boot path.
@@ -189,6 +188,54 @@
   DISALLOW_COPY_AND_ASSIGN(BackgroundMethodSamplingProfiler);
 };
 
+// TODO: incorporate in ProfileSampleResults
+
+// Profile data.  This is generated from previous runs of the program and stored
+// in a file.  It is used to determine whether to compile a particular method or not.
+class ProfileData {
+ public:
+  ProfileData() : count_(0), method_size_(0), usedPercent_(0) {}
+  ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
+    double usedPercent, double topKUsedPercentage) :
+    method_name_(method_name), count_(count), method_size_(method_size),
+    usedPercent_(usedPercent), topKUsedPercentage_(topKUsedPercentage) {
+    // TODO: currently method_size_ and count_ are unused.
+    UNUSED(method_size_);
+    UNUSED(count_);
+  }
+
+  bool IsAbove(double v) const { return usedPercent_ >= v; }
+  double GetUsedPercent() const { return usedPercent_; }
+  uint32_t GetCount() const { return count_; }
+  double GetTopKUsedPercentage() const { return topKUsedPercentage_; }
+
+ private:
+  std::string method_name_;    // Method name.
+  uint32_t count_;             // Number of times it has been called.
+  uint32_t method_size_;       // Size of the method on dex instructions.
+  double usedPercent_;         // Percentage of how many times this method was called.
+  double topKUsedPercentage_;  // The percentage of the group that comprise K% of the total used
+                               // methods this methods belongs to.
+};
+
+// Profile data is stored in a map, indexed by the full method name.
+typedef std::map<std::string, ProfileData> ProfileMap;
+
+class ProfileHelper {
+ private:
+  ProfileHelper();
+
+ public:
+  // Read the profile data from the given file.  Calculates the percentage for each method.
+  // Returns false if there was no profile file or it was malformed.
+  static bool LoadProfileMap(ProfileMap& profileMap, const std::string& fileName);
+
+  // Read the profile data from the given file and computes the group that comprise
+  // topKPercentage of the total used methods.
+  static bool LoadTopKSamples(std::set<std::string>& topKMethods, const std::string& fileName,
+                              double topKPercentage);
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_PROFILER_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index edc3b33..5c31d35 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -121,6 +121,7 @@
       profile_duration_s_(0),
       profile_interval_us_(0),
       profile_backoff_coefficient_(0),
+      profile_start_immediately_(true),
       method_trace_(false),
       method_trace_file_size_(0),
       instrumentation_(),
@@ -187,7 +188,7 @@
 }
 
 struct AbortState {
-  void Dump(std::ostream& os) {
+  void Dump(std::ostream& os) NO_THREAD_SAFETY_ANALYSIS {
     if (gAborting > 1) {
       os << "Runtime aborting --- recursively, so no thread-specific detail!\n";
       return;
@@ -199,26 +200,33 @@
       return;
     }
     Thread* self = Thread::Current();
-    if (self == NULL) {
+    if (self == nullptr) {
       os << "(Aborting thread was not attached to runtime!)\n";
     } else {
-      // TODO: we're aborting and the ScopedObjectAccess may attempt to acquire the mutator_lock_
-      //       which may block indefinitely if there's a misbehaving thread holding it exclusively.
-      //       The code below should be made robust to this.
-      ScopedObjectAccess soa(self);
       os << "Aborting thread:\n";
-      self->Dump(os);
-      if (self->IsExceptionPending()) {
-        ThrowLocation throw_location;
-        mirror::Throwable* exception = self->GetException(&throw_location);
-        os << "Pending exception " << PrettyTypeOf(exception)
-            << " thrown by '" << throw_location.Dump() << "'\n"
-            << exception->Dump();
+      if (Locks::mutator_lock_->IsExclusiveHeld(self) || Locks::mutator_lock_->IsSharedHeld(self)) {
+        DumpThread(os, self);
+      } else {
+        if (Locks::mutator_lock_->SharedTryLock(self)) {
+          DumpThread(os, self);
+          Locks::mutator_lock_->SharedUnlock(self);
+        }
       }
     }
     DumpAllThreads(os, self);
   }
 
+  void DumpThread(std::ostream& os, Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    self->Dump(os);
+    if (self->IsExceptionPending()) {
+      ThrowLocation throw_location;
+      mirror::Throwable* exception = self->GetException(&throw_location);
+      os << "Pending exception " << PrettyTypeOf(exception)
+          << " thrown by '" << throw_location.Dump() << "'\n"
+          << exception->Dump();
+    }
+  }
+
   void DumpAllThreads(std::ostream& os, Thread* self) NO_THREAD_SAFETY_ANALYSIS {
     bool tll_already_held = Locks::thread_list_lock_->IsExclusiveHeld(self);
     bool ml_already_held = Locks::mutator_lock_->IsSharedHeld(self);
@@ -391,7 +399,7 @@
     if (fd >= 0) {
       close(fd);
     }
-    StartProfiler(profile_output_filename_.c_str(), "", true);
+    StartProfiler(profile_output_filename_.c_str(), "");
   }
 
   return true;
@@ -616,6 +624,7 @@
   profile_duration_s_ = options->profile_duration_s_;
   profile_interval_us_ = options->profile_interval_us_;
   profile_backoff_coefficient_ = options->profile_backoff_coefficient_;
+  profile_start_immediately_ = options->profile_start_immediately_;
   profile_ = options->profile_;
   profile_output_filename_ = options->profile_output_filename_;
   // TODO: move this to just be an Trace::Start argument
@@ -1143,10 +1152,9 @@
   method_verifiers_.erase(it);
 }
 
-void Runtime::StartProfiler(const char* appDir, const char* procName, bool startImmediately) {
+void Runtime::StartProfiler(const char* appDir, const char* procName) {
   BackgroundMethodSamplingProfiler::Start(profile_period_s_, profile_duration_s_, appDir,
-      procName, profile_interval_us_,
-      profile_backoff_coefficient_, startImmediately);
+      procName, profile_interval_us_, profile_backoff_coefficient_, profile_start_immediately_);
 }
 
 // Transaction support.
diff --git a/runtime/runtime.h b/runtime/runtime.h
index e94072c..462711e 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -374,7 +374,7 @@
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
-  void StartProfiler(const char* appDir, const char* procName, bool startImmediately = false);
+  void StartProfiler(const char* appDir, const char* procName);
   void UpdateProfilerState(int state);
 
   // Transaction support.
@@ -542,6 +542,8 @@
   uint32_t profile_duration_s_;         // Run profile for n seconds.
   uint32_t profile_interval_us_;        // Microseconds between samples.
   double profile_backoff_coefficient_;  // Coefficient to exponential backoff.
+  bool profile_start_immediately_;      // Whether the profile should start upon app
+                                        // startup or be delayed by some random offset.
 
   bool method_trace_;
   std::string method_trace_file_;
diff --git a/runtime/stack_indirect_reference_table.h b/runtime/stack_indirect_reference_table.h
index 6049e06..b113129 100644
--- a/runtime/stack_indirect_reference_table.h
+++ b/runtime/stack_indirect_reference_table.h
@@ -44,6 +44,10 @@
     return number_of_references_;
   }
 
+  // We have versions with and without explicit pointer size of the following. The first two are
+  // used at runtime, so OFFSETOF_MEMBER computes the right offsets automatically. The last one
+  // takes the pointer size explicitly so that at compile time we can cross-compile correctly.
+
   // Returns the size of a StackIndirectReferenceTable containing num_references sirts.
   static size_t SizeOf(uint32_t num_references) {
     size_t header_size = OFFSETOF_MEMBER(StackIndirectReferenceTable, references_);
@@ -60,7 +64,7 @@
   // Get the size of the SIRT for the number of entries, with padding added for potential alignment.
   static size_t GetAlignedSirtSizeTarget(size_t pointer_size, uint32_t num_references) {
     // Assume that the layout is packed.
-    size_t header_size = pointer_size + sizeof(uint32_t);
+    size_t header_size = pointer_size + sizeof(number_of_references_);
     // This assumes there is no layout change between 32 and 64b.
     size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
     size_t sirt_size = header_size + data_size;
@@ -109,18 +113,18 @@
   }
 
   // Offset of link within SIRT, used by generated code
-  static size_t LinkOffset() {
-    return OFFSETOF_MEMBER(StackIndirectReferenceTable, link_);
+  static size_t LinkOffset(size_t pointer_size) {
+    return 0;
   }
 
   // Offset of length within SIRT, used by generated code
-  static uint32_t NumberOfReferencesOffset() {
-    return OFFSETOF_MEMBER(StackIndirectReferenceTable, number_of_references_);
+  static size_t NumberOfReferencesOffset(size_t pointer_size) {
+    return pointer_size;
   }
 
   // Offset of link within SIRT, used by generated code
-  static size_t ReferencesOffset() {
-    return OFFSETOF_MEMBER(StackIndirectReferenceTable, references_);
+  static size_t ReferencesOffset(size_t pointer_size) {
+    return pointer_size + sizeof(number_of_references_);
   }
 
  private:
diff --git a/runtime/stack_indirect_reference_table_test.cc b/runtime/stack_indirect_reference_table_test.cc
new file mode 100644
index 0000000..72ef6b6
--- /dev/null
+++ b/runtime/stack_indirect_reference_table_test.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stack_indirect_reference_table.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+// Test the offsets computed for members of StackIndirectReferenceTable. Because of cross-compiling
+// it is impossible the use OFFSETOF_MEMBER, so we do some reasonable computations ourselves. This
+// test checks whether we do the right thing.
+TEST(StackIndirectReferenceTableTest, Offsets) {
+  // As the members of StackIndirectReferenceTable are private, we cannot use OFFSETOF_MEMBER
+  // here. So do the inverse: set some data, and access it through pointers created from the offsets.
+
+  StackIndirectReferenceTable test_table(reinterpret_cast<mirror::Object*>(0x1234));
+  test_table.SetLink(reinterpret_cast<StackIndirectReferenceTable*>(0x5678));
+  test_table.SetNumberOfReferences(0x9ABC);
+
+  byte* table_base_ptr = reinterpret_cast<byte*>(&test_table);
+
+  {
+    uintptr_t* link_ptr = reinterpret_cast<uintptr_t*>(table_base_ptr +
+        StackIndirectReferenceTable::LinkOffset(kPointerSize));
+    EXPECT_EQ(*link_ptr, static_cast<size_t>(0x5678));
+  }
+
+  {
+    uint32_t* num_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
+        StackIndirectReferenceTable::NumberOfReferencesOffset(kPointerSize));
+    EXPECT_EQ(*num_ptr, static_cast<size_t>(0x9ABC));
+  }
+
+  {
+    // Assume sizeof(StackReference<mirror::Object>) == sizeof(uint32_t)
+    // TODO: How can we make this assumption-less but still access directly and fully?
+    EXPECT_EQ(sizeof(StackReference<mirror::Object>), sizeof(uint32_t));
+
+    uint32_t* ref_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
+        StackIndirectReferenceTable::ReferencesOffset(kPointerSize));
+    EXPECT_EQ(*ref_ptr, static_cast<uint32_t>(0x1234));
+  }
+}
+
+}  // namespace art
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5a2410a..131e2b6 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -711,7 +711,9 @@
   bool is_daemon = false;
   Thread* self = Thread::Current();
 
-  if (self != nullptr && thread != nullptr && thread->tlsPtr_.opeer != nullptr) {
+  // Don't do this if we are aborting since the GC may have all the threads suspended. This will
+  // cause ScopedObjectAccessUnchecked to deadlock.
+  if (gAborting == 0 && self != nullptr && thread != nullptr && thread->tlsPtr_.opeer != nullptr) {
     ScopedObjectAccessUnchecked soa(self);
     priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)
         ->GetInt(thread->tlsPtr_.opeer);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 8dad419..270deb0 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -617,7 +617,7 @@
   DCHECK(pReq != NULL);
   if (pReq->invoke_needed) {
     // Clear this before signaling.
-    pReq->invoke_needed = false;
+    pReq->Clear();
 
     VLOG(jdwp) << "invoke complete, signaling";
     MutexLock mu(self, pReq->lock);
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 1f24478..b85eb7e 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -549,6 +549,21 @@
   LOG(ERROR) << "Unexpected dex PC event in tracing " << PrettyMethod(method) << " " << new_dex_pc;
 };
 
+void Trace::FieldRead(Thread* /*thread*/, mirror::Object* this_object,
+                       mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // We're not recorded to listen to this kind of event, so complain.
+  LOG(ERROR) << "Unexpected field read event in tracing " << PrettyMethod(method) << " " << dex_pc;
+}
+
+void Trace::FieldWritten(Thread* /*thread*/, mirror::Object* this_object,
+                          mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field,
+                          const JValue& field_value)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // We're not recorded to listen to this kind of event, so complain.
+  LOG(ERROR) << "Unexpected field write event in tracing " << PrettyMethod(method) << " " << dex_pc;
+}
+
 void Trace::MethodEntered(Thread* thread, mirror::Object* this_object,
                           mirror::ArtMethod* method, uint32_t dex_pc) {
   uint32_t thread_clock_diff = 0;
diff --git a/runtime/trace.h b/runtime/trace.h
index 1af1283..bf4995a 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -32,6 +32,7 @@
 namespace art {
 
 namespace mirror {
+  class ArtField;
   class ArtMethod;
 }  // namespace mirror
 class Thread;
@@ -54,7 +55,7 @@
   kSampleProfilingActive,
 };
 
-class Trace : public instrumentation::InstrumentationListener {
+class Trace FINAL : public instrumentation::InstrumentationListener {
  public:
   enum TraceFlag {
     kTraceCountAllocs = 1,
@@ -78,23 +79,31 @@
   void CompareAndUpdateStackTrace(Thread* thread, std::vector<mirror::ArtMethod*>* stack_trace)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             mirror::ArtMethod* method, uint32_t dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            mirror::ArtMethod* method, uint32_t dex_pc,
-                            const JValue& return_value)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            mirror::ArtMethod* method, uint32_t dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          mirror::ArtMethod* method, uint32_t new_dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
-                               mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
-                               mirror::Throwable* exception_object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // InstrumentationListener implementation.
+  void MethodEntered(Thread* thread, mirror::Object* this_object,
+                     mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void MethodExited(Thread* thread, mirror::Object* this_object,
+                    mirror::ArtMethod* method, uint32_t dex_pc,
+                    const JValue& return_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                    mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void DexPcMoved(Thread* thread, mirror::Object* this_object,
+                  mirror::ArtMethod* method, uint32_t new_dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void FieldRead(Thread* thread, mirror::Object* this_object,
+                 mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void FieldWritten(Thread* thread, mirror::Object* this_object,
+                    mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field,
+                    const JValue& field_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
+                       mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
+                       mirror::Throwable* exception_object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
 
   // Reuse an old stack trace if it exists, otherwise allocate a new one.
   static std::vector<mirror::ArtMethod*>* AllocStackTrace();
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 21e3e44..5a9d27c 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3128,15 +3128,14 @@
     this_class = actual_arg_type.GetClass();
   } else {
     const std::string& descriptor(actual_arg_type.GetDescriptor());
-    Thread* self = Thread::Current();
-    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    this_class = class_linker->FindClass(self, descriptor.c_str(), *class_loader_);
+    // TODO: Precise or not?
+    this_class = reg_types_.FromDescriptor(class_loader_->get(), descriptor.c_str(),
+                                           false).GetClass();
     if (this_class == NULL) {
       Thread* self = Thread::Current();
       self->ClearException();
       // Look for a system class
-      SirtRef<mirror::ClassLoader> null_class_loader(self, nullptr);
-      this_class = class_linker->FindClass(self, descriptor.c_str(), null_class_loader);
+      this_class = reg_types_.FromDescriptor(nullptr, descriptor.c_str(), false).GetClass();
     }
   }
   if (this_class == NULL) {
@@ -3654,14 +3653,14 @@
   } else {
     // We need to resolve the class from its descriptor.
     const std::string& descriptor(object_type.GetDescriptor());
-    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     Thread* self = Thread::Current();
-    object_class = class_linker->FindClass(self, descriptor.c_str(), *class_loader_);
+    object_class = reg_types_.FromDescriptor(class_loader_->get(), descriptor.c_str(),
+                                             false).GetClass();
     if (object_class == NULL) {
       self->ClearException();
       // Look for a system class
-      SirtRef<mirror::ClassLoader> null_class_loader(self, nullptr);
-      object_class = class_linker->FindClass(self, descriptor.c_str(), null_class_loader);
+      object_class = reg_types_.FromDescriptor(nullptr, descriptor.c_str(),
+                                               false).GetClass();
     }
   }
   if (object_class == NULL) {
diff --git a/test/MyClassNatives/MyClassNatives.java b/test/MyClassNatives/MyClassNatives.java
index 09f1783..b5e0204 100644
--- a/test/MyClassNatives/MyClassNatives.java
+++ b/test/MyClassNatives/MyClassNatives.java
@@ -80,4 +80,16 @@
         Object o248, Object o249, Object o250, Object o251, Object o252, Object o253);
 
     native void withoutImplementation();
+    
+    native static void stackArgsIntsFirst(int i1, int i2, int i3, int i4, int i5, int i6, int i7,
+        int i8, int i9, int i10, float f1, float f2, float f3, float f4, float f5, float f6,
+        float f7, float f8, float f9, float f10);
+    
+    native static void stackArgsFloatsFirst(float f1, float f2, float f3, float f4, float f5,
+        float f6, float f7, float f8, float f9, float f10, int i1, int i2, int i3, int i4, int i5,
+        int i6, int i7, int i8, int i9, int i10);
+    
+    native static void stackArgsMixed(int i1, float f1, int i2, float f2, int i3, float f3, int i4,
+        float f4, int i5, float f5, int i6, float f6, int i7, float f7, int i8, float f8, int i9,
+        float f9, int i10, float f10);
 }