Merge "Use LIRSlowPath for throwing NPE."
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 27d687c..6aa1c18 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -52,10 +52,6 @@
   art_multilib := $(7)
 
   include $(CLEAR_VARS)
-  ifeq ($$(art_target_or_host),target)
-    include external/stlport/libstlport.mk
-  endif
-
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   LOCAL_MODULE_TAGS := optional
   LOCAL_SRC_FILES := $$(art_source)
@@ -104,9 +100,12 @@
   endif
 
   ifeq ($$(art_target_or_host),target)
+    include art/build/Android.libcxx.mk
     include $(BUILD_EXECUTABLE)
     ART_TARGET_EXECUTABLES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
   else # host
+    LOCAL_IS_HOST_MODULE := true
+    include art/build/Android.libcxx.mk
     include $(BUILD_HOST_EXECUTABLE)
     ART_HOST_EXECUTABLES := $(ART_HOST_EXECUTABLES) $(HOST_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
   endif
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 22e6df4..b6e6fac 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -18,6 +18,7 @@
 
 RUNTIME_GTEST_COMMON_SRC_FILES := \
 	runtime/barrier_test.cc \
+	runtime/base/bit_field_test.cc \
 	runtime/base/bit_vector_test.cc \
 	runtime/base/hex_dump_test.cc \
 	runtime/base/histogram_test.cc \
@@ -149,10 +150,6 @@
   art_gtest_name := $$(notdir $$(basename $$(art_gtest_filename)))
 
   include $(CLEAR_VARS)
-  ifeq ($$(art_target_or_host),target)
-    include external/stlport/libstlport.mk
-  endif
-
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   LOCAL_MODULE := $$(art_gtest_name)
   ifeq ($$(art_target_or_host),target)
@@ -183,6 +180,7 @@
     LOCAL_MODULE_PATH_32 := $(ART_BASE_NATIVETEST_OUT)
     LOCAL_MODULE_PATH_64 := $(ART_BASE_NATIVETEST_OUT)64
     LOCAL_MULTILIB := both
+    include art/build/Android.libcxx.mk
     include $(BUILD_EXECUTABLE)
     ART_TARGET_GTEST_EXECUTABLES += $$(art_gtest_exe)
     art_gtest_target := test-art-$$(art_target_or_host)-gtest-$$(art_gtest_name)
@@ -207,6 +205,8 @@
         LOCAL_STATIC_LIBRARIES += libgtest_host
     endif
     LOCAL_LDLIBS += -lpthread -ldl
+    LOCAL_IS_HOST_MODULE := true
+    include art/build/Android.libcxx.mk
     include $(BUILD_HOST_EXECUTABLE)
     art_gtest_exe := $(HOST_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
     ART_HOST_GTEST_EXECUTABLES += $$(art_gtest_exe)
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index 14d16ac..18d321a 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -31,10 +31,6 @@
   art_target_or_host := $(1)
 
   include $(CLEAR_VARS)
-  ifeq ($$(art_target_or_host),target)
-   include external/stlport/libstlport.mk
-  endif
-
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   LOCAL_MODULE := libarttest
   ifeq ($$(art_target_or_host),target)
@@ -55,6 +51,7 @@
     LOCAL_MODULE_PATH_32 := $(ART_BASE_TEST_OUT)
     LOCAL_MODULE_PATH_64 := $(ART_BASE_TEST_OUT)64
     LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
+    include art/build/Android.libcxx.mk
     include $(BUILD_SHARED_LIBRARY)
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
@@ -64,6 +61,8 @@
     ifeq ($(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
     endif
+    LOCAL_IS_HOST_MODULE := true
+    include art/build/Android.libcxx.mk
     include $(BUILD_HOST_SHARED_LIBRARY)
   endif
 endef
diff --git a/build/Android.libcxx.mk b/build/Android.libcxx.mk
new file mode 100644
index 0000000..3dd1eb7
--- /dev/null
+++ b/build/Android.libcxx.mk
@@ -0,0 +1,20 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ifneq ($(LOCAL_IS_HOST_MODULE),true)
+  include external/stlport/libstlport.mk
+  # include external/libcxx/libcxx.mk
+endif
diff --git a/compiler/Android.mk b/compiler/Android.mk
index b17cd52..6d656e6 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -158,11 +158,10 @@
   art_ndebug_or_debug := $(2)
 
   include $(CLEAR_VARS)
-  ifeq ($$(art_target_or_host),target)
-    include external/stlport/libstlport.mk
-  else
+  ifeq ($$(art_target_or_host),host)
     LOCAL_IS_HOST_MODULE := true
   endif
+  include art/build/Android.libcxx.mk
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 8e013c1..59ed827 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -82,21 +82,7 @@
 }
 
 uint32_t CompiledCode::AlignCode(uint32_t offset, InstructionSet instruction_set) {
-  switch (instruction_set) {
-    case kArm:
-    case kThumb2:
-      return RoundUp(offset, kArmAlignment);
-    case kArm64:
-      return RoundUp(offset, kArm64Alignment);
-    case kMips:
-      return RoundUp(offset, kMipsAlignment);
-    case kX86:  // Fall-through.
-    case kX86_64:
-      return RoundUp(offset, kX86Alignment);
-    default:
-      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return 0;
-  }
+  return RoundUp(offset, GetInstructionSetAlignment(instruction_set));
 }
 
 size_t CompiledCode::CodeDelta() const {
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 201dc47..1bf5fce 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -158,7 +158,7 @@
   if (cu.instruction_set == kArm) {
     cu.instruction_set = kThumb2;
   }
-  cu.target64 = (cu.instruction_set == kX86_64) || (cu.instruction_set == kArm64);
+  cu.target64 = Is64BitInstructionSet(cu.instruction_set);
   cu.compiler = compiler;
   // TODO: x86_64 & arm64 are not yet implemented.
   CHECK((cu.instruction_set == kThumb2) ||
@@ -166,7 +166,6 @@
         (cu.instruction_set == kX86_64) ||
         (cu.instruction_set == kMips));
 
-
   /* Adjust this value accordingly once inlining is performed */
   cu.num_dalvik_registers = code_item->registers_size_;
   // TODO: set this from command line
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 0d938d9..055f60c 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -520,7 +520,12 @@
     }
     // rBase now holds static storage base
     if (is_long_or_double) {
-      rl_src = LoadValueWide(rl_src, kAnyReg);
+      RegisterClass register_kind = kAnyReg;
+      if (field_info.IsVolatile() && cu_->instruction_set == kX86) {
+        // Force long/double volatile stores into SSE registers to avoid tearing.
+        register_kind = kFPReg;
+      }
+      rl_src = LoadValueWide(rl_src, register_kind);
     } else {
       rl_src = LoadValue(rl_src, kAnyReg);
     }
@@ -601,7 +606,12 @@
       FreeTemp(r_method);
     }
     // r_base now holds static storage base
-    RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
+    RegisterClass result_reg_kind = kAnyReg;
+    if (field_info.IsVolatile() && cu_->instruction_set == kX86) {
+      // Force long/double volatile loads into SSE registers to avoid tearing.
+      result_reg_kind = kFPReg;
+    }
+    RegLocation rl_result = EvalLoc(rl_dest, result_reg_kind, true);
 
     if (is_long_or_double) {
       LoadBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg, INVALID_SREG);
@@ -755,9 +765,12 @@
       DCHECK(rl_dest.wide);
       GenNullCheck(rl_obj.reg, opt_flags);
       if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-        rl_result = EvalLoc(rl_dest, reg_class, true);
-        // FIXME?  duplicate null check?
-        GenNullCheck(rl_obj.reg, opt_flags);
+        RegisterClass result_reg_kind = kAnyReg;
+        if (field_info.IsVolatile() && cu_->instruction_set == kX86) {
+          // Force long/double volatile loads into SSE registers to avoid tearing.
+          result_reg_kind = kFPReg;
+        }
+        rl_result = EvalLoc(rl_dest, result_reg_kind, true);
         LoadBaseDispWide(rl_obj.reg, field_info.FieldOffset().Int32Value(), rl_result.reg,
                          rl_obj.s_reg_low);
         MarkPossibleNullPointerException(opt_flags);
@@ -822,7 +835,12 @@
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     rl_obj = LoadValue(rl_obj, kCoreReg);
     if (is_long_or_double) {
-      rl_src = LoadValueWide(rl_src, kAnyReg);
+      RegisterClass src_reg_kind = kAnyReg;
+      if (field_info.IsVolatile() && cu_->instruction_set == kX86) {
+        // Force long/double volatile stores into SSE registers to avoid tearing.
+        src_reg_kind = kFPReg;
+      }
+      rl_src = LoadValueWide(rl_src, src_reg_kind);
       GenNullCheck(rl_obj.reg, opt_flags);
       RegStorage reg_ptr = AllocTemp();
       OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg, field_info.FieldOffset().Int32Value());
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index dcc5d9b..5a8ad7a 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -1064,6 +1064,7 @@
   LoadWordDisp(rs_rDX, count_offset, rs_rCX);
   LIR *length_compare = nullptr;
   int start_value = 0;
+  bool is_index_on_stack = false;
   if (zero_based) {
     // We have to handle an empty string.  Use special instruction JECXZ.
     length_compare = NewLIR0(kX86Jecxz8);
@@ -1084,14 +1085,32 @@
       // Runtime start index.
       rl_start = UpdateLoc(rl_start);
       if (rl_start.location == kLocPhysReg) {
+        // Handle "start index < 0" case.
+        OpRegReg(kOpXor, rs_rBX, rs_rBX);
+        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
+        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
+
+        // The length of the string should be greater than the start index.
         length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
         OpRegReg(kOpSub, rs_rCX, rl_start.reg);
+        if (rl_start.reg == rs_rDI) {
+          // The special case. We will use EDI further, so lets put start index to stack.
+          NewLIR1(kX86Push32R, rDI);
+          is_index_on_stack = true;
+        }
       } else {
-        // Compare to memory to avoid a register load.  Handle pushed EDI.
+        // Load the start index from stack, remembering that we pushed EDI.
         int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
-        OpRegMem(kOpCmp, rs_rCX, rs_rX86_SP, displacement);
-        length_compare = NewLIR2(kX86Jcc8, 0, kX86CondLe);
-        OpRegMem(kOpSub, rs_rCX, rs_rX86_SP, displacement);
+        LoadWordDisp(rs_rX86_SP, displacement, rs_rBX);
+        OpRegReg(kOpXor, rs_rDI, rs_rDI);
+        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
+        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
+
+        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
+        OpRegReg(kOpSub, rs_rCX, rs_rBX);
+        // Put the start index to stack.
+        NewLIR1(kX86Push32R, rBX);
+        is_index_on_stack = true;
       }
     }
   }
@@ -1113,21 +1132,12 @@
       NewLIR3(kX86Lea32RM, rDI, rBX, 2 * start_value);
     }
   } else {
-    if (rl_start.location == kLocPhysReg) {
-      if (rl_start.reg.GetReg() == rDI) {
-        // We have a slight problem here.  We are already using RDI!
-        // Grab the value from the stack.
-        LoadWordDisp(rs_rX86_SP, 0, rs_rDX);
-        OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
-      } else {
-        OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
-      }
-    } else {
-      OpRegCopy(rs_rDI, rs_rBX);
-      // Load the start index from stack, remembering that we pushed EDI.
-      int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
-      LoadWordDisp(rs_rX86_SP, displacement, rs_rDX);
+    if (is_index_on_stack == true) {
+      // Load the start index from stack.
+      NewLIR1(kX86Pop32R, rDX);
       OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
+    } else {
+      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
     }
   }
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 2b20c6f..0ad30be 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -339,7 +339,6 @@
       compiler_(Compiler::Create(compiler_kind)),
       instruction_set_(instruction_set),
       instruction_set_features_(instruction_set_features),
-      instruction_set_is_64_bit_(instruction_set == kX86_64 || instruction_set == kArm64),
       freezing_constructor_lock_("freezing constructor lock"),
       compiled_classes_lock_("compiled classes lock"),
       compiled_methods_lock_("compiled method lock"),
@@ -448,7 +447,7 @@
 }
 
 #define CREATE_TRAMPOLINE(type, abi, offset) \
-    if (instruction_set_is_64_bit_) { \
+    if (Is64BitInstructionSet(instruction_set_)) { \
       return CreateTrampoline64(instruction_set_, abi, \
                                 type ## _ENTRYPOINT_OFFSET(8, offset)); \
     } else { \
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d49523a..d7d40d5 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -692,7 +692,6 @@
 
   const InstructionSet instruction_set_;
   const InstructionSetFeatures instruction_set_features_;
-  const bool instruction_set_is_64_bit_;
 
   // All class references that require
   mutable ReaderWriterMutex freezing_constructor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 0405198..f76587a 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -235,8 +235,8 @@
   }
 
   // Create the image bitmap.
-  image_bitmap_.reset(gc::accounting::SpaceBitmap::Create("image bitmap", image_->Begin(),
-                                                          length));
+  image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create("image bitmap", image_->Begin(),
+                                                                    length));
   if (image_bitmap_.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate memory for image bitmap";
     return false;
@@ -525,7 +525,7 @@
 
   // Return to write header at start of image with future location of image_roots. At this point,
   // image_end_ is the size of the image (excluding bitmaps).
-  const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * gc::accounting::SpaceBitmap::kAlignment;
+  const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * kObjectAlignment;
   const size_t bitmap_bytes = RoundUp(image_end_, heap_bytes_per_bitmap_byte) /
       heap_bytes_per_bitmap_byte;
   ImageHeader image_header(PointerToLowMemUInt32(image_begin_),
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 92b24f6..ee241cb 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -173,7 +173,7 @@
   const byte* oat_data_begin_;
 
   // Image bitmap which lets us know where the objects inside of the image reside.
-  UniquePtr<gc::accounting::SpaceBitmap> image_bitmap_;
+  UniquePtr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
 
   // Offset from oat_data_begin_ to the stubs.
   uint32_t interpreter_to_interpreter_bridge_offset_;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 64508d1..93b1b5a 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -64,6 +64,7 @@
   if (instruction_set == kThumb2) {
     instruction_set = kArm;
   }
+  const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
   // Calling conventions used to iterate over parameters to method
   UniquePtr<JniCallingConvention> main_jni_conv(
       JniCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
@@ -109,7 +110,7 @@
                            main_jni_conv->ReferenceCount(),
                            mr_conv->InterproceduralScratchRegister());
 
-  if (instruction_set == kArm64 || instruction_set == kX86_64) {
+  if (is_64_bit_target) {
     __ CopyRawPtrFromThread64(main_jni_conv->SirtLinkOffset(),
                             Thread::TopSirtOffset<8>(),
                             mr_conv->InterproceduralScratchRegister());
@@ -171,7 +172,7 @@
   }
 
   // 4. Write out the end of the quick frames.
-  if (instruction_set == kArm64 || instruction_set == kX86_64) {
+  if (is_64_bit_target) {
     __ StoreStackPointerToThread64(Thread::TopOfManagedStackOffset<8>());
     __ StoreImmediateToThread64(Thread::TopOfManagedStackPcOffset<8>(), 0,
                               mr_conv->InterproceduralScratchRegister());
@@ -216,7 +217,7 @@
   }
   if (main_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+    if (is_64_bit_target) {
       __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start64),
              main_jni_conv->InterproceduralScratchRegister());
     } else {
@@ -226,7 +227,7 @@
   } else {
     __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                         main_jni_conv->InterproceduralScratchRegister());
-    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+    if (is_64_bit_target) {
       __ CallFromThread64(jni_start64, main_jni_conv->InterproceduralScratchRegister());
     } else {
       __ CallFromThread32(jni_start32, main_jni_conv->InterproceduralScratchRegister());
@@ -292,14 +293,14 @@
   if (main_jni_conv->IsCurrentParamInRegister()) {
     ManagedRegister jni_env = main_jni_conv->CurrentParamRegister();
     DCHECK(!jni_env.Equals(main_jni_conv->InterproceduralScratchRegister()));
-    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+    if (is_64_bit_target) {
       __ LoadRawPtrFromThread64(jni_env, Thread::JniEnvOffset<8>());
     } else {
       __ LoadRawPtrFromThread32(jni_env, Thread::JniEnvOffset<4>());
     }
   } else {
     FrameOffset jni_env = main_jni_conv->CurrentParamStackOffset();
-    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+    if (is_64_bit_target) {
       __ CopyRawPtrFromThread64(jni_env, Thread::JniEnvOffset<8>(),
                             main_jni_conv->InterproceduralScratchRegister());
     } else {
@@ -331,7 +332,7 @@
     if (instruction_set == kMips && main_jni_conv->GetReturnType() == Primitive::kPrimDouble &&
         return_save_location.Uint32Value() % 8 != 0) {
       // Ensure doubles are 8-byte aligned for MIPS
-      return_save_location = FrameOffset(return_save_location.Uint32Value() + kPointerSize);
+      return_save_location = FrameOffset(return_save_location.Uint32Value() + kMipsPointerSize);
     }
     CHECK_LT(return_save_location.Uint32Value(), frame_size+main_out_arg_size);
     __ Store(return_save_location, main_jni_conv->ReturnRegister(), main_jni_conv->SizeOfReturnValue());
@@ -380,7 +381,7 @@
   }
   if (end_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
-    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+    if (is_64_bit_target) {
       __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end64),
               end_jni_conv->InterproceduralScratchRegister());
     } else {
@@ -390,7 +391,7 @@
   } else {
     __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset(),
                         end_jni_conv->InterproceduralScratchRegister());
-    if (instruction_set == kArm64 || instruction_set == kX86_64) {
+    if (is_64_bit_target) {
       __ CallFromThread64(ThreadOffset<8>(jni_end64), end_jni_conv->InterproceduralScratchRegister());
     } else {
       __ CallFromThread32(ThreadOffset<4>(jni_end32), end_jni_conv->InterproceduralScratchRegister());
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index 1d027f9..fe60959 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -314,23 +314,8 @@
 // section if the section alignment is greater than kArchAlignment.
 void LlvmCompilationUnit::CheckCodeAlign(uint32_t align) const {
   InstructionSet insn_set = GetInstructionSet();
-  switch (insn_set) {
-  case kThumb2:
-  case kArm:
-    CHECK_LE(align, static_cast<uint32_t>(kArmAlignment));
-    break;
-
-  case kX86:
-    CHECK_LE(align, static_cast<uint32_t>(kX86Alignment));
-    break;
-
-  case kMips:
-    CHECK_LE(align, static_cast<uint32_t>(kMipsAlignment));
-    break;
-
-  default:
-    LOG(FATAL) << "Unknown instruction set: " << insn_set;
-  }
+  size_t insn_set_align = GetInstructionSetAlignment(insn_set);
+  CHECK_LE(align, static_cast<uint32_t>(insn_set_align));
 }
 
 
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index eff2425..dc66e9c 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -345,36 +345,6 @@
   return offset;
 }
 
-static void DCheckCodeAlignment(size_t offset, InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      DCHECK_ALIGNED(offset, kArmAlignment);
-      break;
-
-    case kArm64:
-      DCHECK_ALIGNED(offset, kArm64Alignment);
-      break;
-
-    case kMips:
-      DCHECK_ALIGNED(offset, kMipsAlignment);
-      break;
-
-    case kX86_64:
-      // Fall-through.
-    case kX86:
-      DCHECK_ALIGNED(offset, kX86Alignment);
-      break;
-
-    case kNone:
-      // Use a DCHECK instead of FATAL so that in the non-debug case the whole switch can
-      // be optimized away.
-      DCHECK(false);
-      break;
-  }
-}
-
 size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index,
                                     size_t __attribute__((unused)) class_def_index,
                                     size_t class_def_method_index,
@@ -406,7 +376,8 @@
     } else {
       CHECK(quick_code != nullptr);
       offset = compiled_method->AlignCode(offset);
-      DCheckCodeAlignment(offset, compiled_method->GetInstructionSet());
+      DCHECK_ALIGNED_PARAM(offset,
+                           GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
 
       uint32_t code_size = quick_code->size() * sizeof(uint8_t);
       CHECK_NE(code_size, 0U);
@@ -539,11 +510,7 @@
           refs++;
         }
       }
-      InstructionSet trg_isa = compiler_driver_->GetInstructionSet();
-      size_t pointer_size = 4;
-      if (trg_isa == kArm64 || trg_isa == kX86_64) {
-        pointer_size = 8;
-      }
+      size_t pointer_size = GetInstructionSetPointerSize(compiler_driver_->GetInstructionSet());
       size_t sirt_size = StackIndirectReferenceTable::GetAlignedSirtSizeTarget(pointer_size, refs);
 
       // Get the generic spill masks and base frame size.
@@ -857,7 +824,8 @@
         relative_offset += aligned_code_delta;
         DCHECK_OFFSET();
       }
-      DCheckCodeAlignment(relative_offset, compiled_method->GetInstructionSet());
+      DCHECK_ALIGNED_PARAM(relative_offset,
+                           GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
 
       uint32_t code_size = quick_code->size() * sizeof(uint8_t);
       CHECK_NE(code_size, 0U);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index beccf01..1efdd38 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -48,7 +48,8 @@
 
   if (!dex_compilation_unit_->IsStatic()) {
     // Add the implicit 'this' argument, not expressed in the signature.
-    HParameterValue* parameter = new (arena_) HParameterValue(parameter_index++);
+    HParameterValue* parameter =
+        new (arena_) HParameterValue(parameter_index++, Primitive::kPrimNot);
     entry_block_->AddInstruction(parameter);
     HLocal* local = GetLocalAt(locals_index++);
     entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter));
@@ -59,19 +60,24 @@
   for (int i = 0; i < number_of_parameters; i++) {
     switch (shorty[pos++]) {
       case 'F':
-      case 'D':
-      case 'J': {
+      case 'D': {
         return false;
       }
 
       default: {
         // integer and reference parameters.
-        HParameterValue* parameter = new (arena_) HParameterValue(parameter_index++);
+        HParameterValue* parameter =
+            new (arena_) HParameterValue(parameter_index++, Primitive::GetType(shorty[pos - 1]));
         entry_block_->AddInstruction(parameter);
         HLocal* local = GetLocalAt(locals_index++);
         // Store the parameter value in the local that the dex code will use
         // to reference that parameter.
         entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter));
+        if (parameter->GetType() == Primitive::kPrimLong) {
+          i++;
+          locals_index++;
+          parameter_index++;
+        }
         break;
       }
     }
@@ -88,8 +94,8 @@
 
 template<typename T>
 void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not) {
-  HInstruction* first = LoadLocal(instruction.VRegA());
-  HInstruction* second = LoadLocal(instruction.VRegB());
+  HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
   current_block_->AddInstruction(new (arena_) T(first, second));
   if (is_not) {
     current_block_->AddInstruction(new (arena_) HNot(current_block_->GetLastInstruction()));
@@ -205,25 +211,25 @@
 }
 
 template<typename T>
-void HGraphBuilder::Binop_32x(const Instruction& instruction) {
-  HInstruction* first = LoadLocal(instruction.VRegB());
-  HInstruction* second = LoadLocal(instruction.VRegC());
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second));
+void HGraphBuilder::Binop_32x(const Instruction& instruction, Primitive::Type type) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  current_block_->AddInstruction(new (arena_) T(type, first, second));
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
 template<typename T>
-void HGraphBuilder::Binop_12x(const Instruction& instruction) {
-  HInstruction* first = LoadLocal(instruction.VRegA());
-  HInstruction* second = LoadLocal(instruction.VRegB());
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second));
+void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), type);
+  current_block_->AddInstruction(new (arena_) T(type, first, second));
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
 template<typename T>
 void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse) {
-  HInstruction* first = LoadLocal(instruction.VRegB());
-  HInstruction* second = GetConstant(instruction.VRegC_22s());
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = GetIntConstant(instruction.VRegC_22s());
   if (reverse) {
     std::swap(first, second);
   }
@@ -233,8 +239,8 @@
 
 template<typename T>
 void HGraphBuilder::Binop_22b(const Instruction& instruction, bool reverse) {
-  HInstruction* first = LoadLocal(instruction.VRegB());
-  HInstruction* second = GetConstant(instruction.VRegC_22b());
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = GetIntConstant(instruction.VRegC_22b());
   if (reverse) {
     std::swap(first, second);
   }
@@ -242,6 +248,74 @@
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
+void HGraphBuilder::BuildReturn(const Instruction& instruction, Primitive::Type type) {
+  if (type == Primitive::kPrimVoid) {
+    current_block_->AddInstruction(new (arena_) HReturnVoid());
+  } else {
+    HInstruction* value = LoadLocal(instruction.VRegA(), type);
+    current_block_->AddInstruction(new (arena_) HReturn(value));
+  }
+  current_block_->AddSuccessor(exit_block_);
+  current_block_ = nullptr;
+}
+
+bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
+                                uint32_t dex_offset,
+                                uint32_t method_idx,
+                                uint32_t number_of_vreg_arguments,
+                                bool is_range,
+                                uint32_t* args,
+                                uint32_t register_index) {
+  const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
+  const DexFile::ProtoId& proto_id = dex_file_->GetProtoId(method_id.proto_idx_);
+  const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_);
+  Primitive::Type return_type = Primitive::GetType(descriptor[0]);
+  bool is_instance_call =
+      instruction.Opcode() != Instruction::INVOKE_STATIC
+      && instruction.Opcode() != Instruction::INVOKE_STATIC_RANGE;
+  const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1);
+
+  // Treat invoke-direct like static calls for now.
+  HInvoke* invoke = new (arena_) HInvokeStatic(
+      arena_, number_of_arguments, return_type, dex_offset, method_idx);
+
+  size_t start_index = 0;
+  if (is_instance_call) {
+    HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
+    invoke->SetArgumentAt(0, arg);
+    start_index = 1;
+  }
+
+  uint32_t descriptor_index = 1;
+  uint32_t argument_index = start_index;
+  for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) {
+    Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
+    switch (type) {
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        return false;
+
+      default: {
+        if (!is_range && type == Primitive::kPrimLong && args[i] + 1 != args[i + 1]) {
+          LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol()
+                       << " at " << dex_offset;
+          // We do not implement non sequential register pair.
+          return false;
+        }
+        HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
+        invoke->SetArgumentAt(argument_index, arg);
+        if (type == Primitive::kPrimLong) {
+          i++;
+        }
+      }
+    }
+  }
+
+  DCHECK_EQ(argument_index, number_of_arguments);
+  current_block_->AddInstruction(invoke);
+  return true;
+}
+
 bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, int32_t dex_offset) {
   if (current_block_ == nullptr) {
     return true;  // Dead code
@@ -250,28 +324,47 @@
   switch (instruction.Opcode()) {
     case Instruction::CONST_4: {
       int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = GetConstant(instruction.VRegB_11n());
+      HIntConstant* constant = GetIntConstant(instruction.VRegB_11n());
       UpdateLocal(register_index, constant);
       break;
     }
 
     case Instruction::CONST_16: {
       int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = GetConstant(instruction.VRegB_21s());
+      HIntConstant* constant = GetIntConstant(instruction.VRegB_21s());
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_16: {
+      int32_t register_index = instruction.VRegA();
+      HLongConstant* constant = GetLongConstant(instruction.VRegB_21s());
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_32: {
+      int32_t register_index = instruction.VRegA();
+      HLongConstant* constant = GetLongConstant(instruction.VRegB_31i());
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE: {
+      int32_t register_index = instruction.VRegA();
+      HLongConstant* constant = GetLongConstant(instruction.VRegB_51l());
       UpdateLocal(register_index, constant);
       break;
     }
 
     case Instruction::MOVE: {
-      HInstruction* value = LoadLocal(instruction.VRegB());
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
       UpdateLocal(instruction.VRegA(), value);
       break;
     }
 
     case Instruction::RETURN_VOID: {
-      current_block_->AddInstruction(new (arena_) HReturnVoid());
-      current_block_->AddSuccessor(exit_block_);
-      current_block_ = nullptr;
+      BuildReturn(instruction, Primitive::kPrimVoid);
       break;
     }
 
@@ -296,88 +389,82 @@
       break;
     }
 
-    case Instruction::RETURN:
+    case Instruction::RETURN: {
+      BuildReturn(instruction, Primitive::kPrimInt);
+      break;
+    }
+
     case Instruction::RETURN_OBJECT: {
-      HInstruction* value = LoadLocal(instruction.VRegA());
-      current_block_->AddInstruction(new (arena_) HReturn(value));
-      current_block_->AddSuccessor(exit_block_);
-      current_block_ = nullptr;
+      BuildReturn(instruction, Primitive::kPrimNot);
+      break;
+    }
+
+    case Instruction::RETURN_WIDE: {
+      BuildReturn(instruction, Primitive::kPrimLong);
       break;
     }
 
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_DIRECT: {
       uint32_t method_idx = instruction.VRegB_35c();
-      const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
-      uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
-      const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
-      const size_t number_of_arguments = instruction.VRegA_35c();
-
-      if (Primitive::GetType(descriptor[0]) != Primitive::kPrimVoid) {
-        return false;
-      }
-
-      // Treat invoke-direct like static calls for now.
-      HInvokeStatic* invoke = new (arena_) HInvokeStatic(
-          arena_, number_of_arguments, dex_offset, method_idx);
-
+      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
       uint32_t args[5];
       instruction.GetArgs(args);
-
-      for (size_t i = 0; i < number_of_arguments; i++) {
-        HInstruction* arg = LoadLocal(args[i]);
-        HInstruction* push = new (arena_) HPushArgument(arg, i);
-        current_block_->AddInstruction(push);
-        invoke->SetArgumentAt(i, push);
+      if (!BuildInvoke(instruction, dex_offset, method_idx, number_of_vreg_arguments, false, args, -1)) {
+        return false;
       }
-
-      current_block_->AddInstruction(invoke);
       break;
     }
 
     case Instruction::INVOKE_STATIC_RANGE:
     case Instruction::INVOKE_DIRECT_RANGE: {
       uint32_t method_idx = instruction.VRegB_3rc();
-      const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
-      uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
-      const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
-      const size_t number_of_arguments = instruction.VRegA_3rc();
-
-      if (Primitive::GetType(descriptor[0]) != Primitive::kPrimVoid) {
+      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
+      uint32_t register_index = instruction.VRegC();
+      if (!BuildInvoke(instruction, dex_offset, method_idx,
+                       number_of_vreg_arguments, true, nullptr, register_index)) {
         return false;
       }
-
-      // Treat invoke-direct like static calls for now.
-      HInvokeStatic* invoke = new (arena_) HInvokeStatic(
-          arena_, number_of_arguments, dex_offset, method_idx);
-      int32_t register_index = instruction.VRegC();
-      for (size_t i = 0; i < number_of_arguments; i++) {
-        HInstruction* arg = LoadLocal(register_index + i);
-        HInstruction* push = new (arena_) HPushArgument(arg, i);
-        current_block_->AddInstruction(push);
-        invoke->SetArgumentAt(i, push);
-      }
-      current_block_->AddInstruction(invoke);
       break;
     }
 
     case Instruction::ADD_INT: {
-      Binop_32x<HAdd>(instruction);
+      Binop_32x<HAdd>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::ADD_LONG: {
+      Binop_32x<HAdd>(instruction, Primitive::kPrimLong);
       break;
     }
 
     case Instruction::SUB_INT: {
-      Binop_32x<HSub>(instruction);
+      Binop_32x<HSub>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SUB_LONG: {
+      Binop_32x<HSub>(instruction, Primitive::kPrimLong);
       break;
     }
 
     case Instruction::ADD_INT_2ADDR: {
-      Binop_12x<HAdd>(instruction);
+      Binop_12x<HAdd>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::ADD_LONG_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimLong);
       break;
     }
 
     case Instruction::SUB_INT_2ADDR: {
-      Binop_12x<HSub>(instruction);
+      Binop_12x<HSub>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
+    case Instruction::SUB_LONG_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimLong);
       break;
     }
 
@@ -408,6 +495,11 @@
       break;
     }
 
+    case Instruction::MOVE_RESULT_WIDE: {
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      break;
+    }
+
     case Instruction::NOP:
       break;
 
@@ -417,7 +509,7 @@
   return true;
 }
 
-HIntConstant* HGraphBuilder::GetConstant0() {
+HIntConstant* HGraphBuilder::GetIntConstant0() {
   if (constant0_ != nullptr) {
     return constant0_;
   }
@@ -426,7 +518,7 @@
   return constant0_;
 }
 
-HIntConstant* HGraphBuilder::GetConstant1() {
+HIntConstant* HGraphBuilder::GetIntConstant1() {
   if (constant1_ != nullptr) {
     return constant1_;
   }
@@ -435,10 +527,10 @@
   return constant1_;
 }
 
-HIntConstant* HGraphBuilder::GetConstant(int constant) {
+HIntConstant* HGraphBuilder::GetIntConstant(int32_t constant) {
   switch (constant) {
-    case 0: return GetConstant0();
-    case 1: return GetConstant1();
+    case 0: return GetIntConstant0();
+    case 1: return GetIntConstant1();
     default: {
       HIntConstant* instruction = new (arena_) HIntConstant(constant);
       entry_block_->AddInstruction(instruction);
@@ -447,6 +539,12 @@
   }
 }
 
+HLongConstant* HGraphBuilder::GetLongConstant(int64_t constant) {
+  HLongConstant* instruction = new (arena_) HLongConstant(constant);
+  entry_block_->AddInstruction(instruction);
+  return instruction;
+}
+
 HLocal* HGraphBuilder::GetLocalAt(int register_index) const {
   return locals_.Get(register_index);
 }
@@ -456,9 +554,9 @@
   current_block_->AddInstruction(new (arena_) HStoreLocal(local, instruction));
 }
 
-HInstruction* HGraphBuilder::LoadLocal(int register_index) const {
+HInstruction* HGraphBuilder::LoadLocal(int register_index, Primitive::Type type) const {
   HLocal* local = GetLocalAt(register_index);
-  current_block_->AddInstruction(new (arena_) HLoadLocal(local));
+  current_block_->AddInstruction(new (arena_) HLoadLocal(local, type));
   return current_block_->GetLastInstruction();
 }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 60d9982..108514a 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -19,6 +19,7 @@
 
 #include "dex_file.h"
 #include "driver/dex_compilation_unit.h"
+#include "primitive.h"
 #include "utils/allocation.h"
 #include "utils/growable_array.h"
 
@@ -29,13 +30,14 @@
 class HBasicBlock;
 class HGraph;
 class HIntConstant;
+class HLongConstant;
 class HInstruction;
 class HLocal;
 
 class HGraphBuilder : public ValueObject {
  public:
   HGraphBuilder(ArenaAllocator* arena,
-                const DexCompilationUnit* dex_compilation_unit = nullptr,
+                DexCompilationUnit* dex_compilation_unit = nullptr,
                 const DexFile* dex_file = nullptr)
       : arena_(arena),
         branch_targets_(arena, 0),
@@ -63,24 +65,44 @@
   void MaybeUpdateCurrentBlock(size_t index);
   HBasicBlock* FindBlockStartingAt(int32_t index) const;
 
-  HIntConstant* GetConstant0();
-  HIntConstant* GetConstant1();
-  HIntConstant* GetConstant(int constant);
+  HIntConstant* GetIntConstant0();
+  HIntConstant* GetIntConstant1();
+  HIntConstant* GetIntConstant(int32_t constant);
+  HLongConstant* GetLongConstant(int64_t constant);
   void InitializeLocals(uint16_t count);
   HLocal* GetLocalAt(int register_index) const;
   void UpdateLocal(int register_index, HInstruction* instruction) const;
-  HInstruction* LoadLocal(int register_index) const;
+  HInstruction* LoadLocal(int register_index, Primitive::Type type) const;
 
   // Temporarily returns whether the compiler supports the parameters
   // of the method.
   bool InitializeParameters(uint16_t number_of_parameters);
 
-  template<typename T> void Binop_32x(const Instruction& instruction);
-  template<typename T> void Binop_12x(const Instruction& instruction);
-  template<typename T> void Binop_22b(const Instruction& instruction, bool reverse);
-  template<typename T> void Binop_22s(const Instruction& instruction, bool reverse);
+  template<typename T>
+  void Binop_32x(const Instruction& instruction, Primitive::Type type);
+
+  template<typename T>
+  void Binop_12x(const Instruction& instruction, Primitive::Type type);
+
+  template<typename T>
+  void Binop_22b(const Instruction& instruction, bool reverse);
+
+  template<typename T>
+  void Binop_22s(const Instruction& instruction, bool reverse);
+
   template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not);
 
+  void BuildReturn(const Instruction& instruction, Primitive::Type type);
+
+  // Builds an invocation node and returns whether the instruction is supported.
+  bool BuildInvoke(const Instruction& instruction,
+                   uint32_t dex_offset,
+                   uint32_t method_idx,
+                   uint32_t number_of_vreg_arguments,
+                   bool is_range,
+                   uint32_t* args,
+                   uint32_t register_index);
+
   ArenaAllocator* const arena_;
 
   // A list of the size of the dex code holding block information for
@@ -99,7 +121,7 @@
   HIntConstant* constant1_;
 
   const DexFile* const dex_file_;
-  const DexCompilationUnit* const dex_compilation_unit_;
+  DexCompilationUnit* const dex_compilation_unit_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 6648598..5c7cac1 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
 
+#include "base/bit_field.h"
 #include "globals.h"
 #include "instruction_set.h"
 #include "memory_region.h"
@@ -25,6 +26,8 @@
 
 namespace art {
 
+static size_t constexpr kVRegSize = 4;
+
 class DexCompilationUnit;
 
 class CodeAllocator {
@@ -49,30 +52,149 @@
  */
 class Location : public ValueObject {
  public:
-  template<typename T>
-  T reg() const { return static_cast<T>(reg_); }
+  enum Kind {
+    kInvalid = 0,
+    kStackSlot = 1,  // Word size slot.
+    kDoubleStackSlot = 2,  // 64bit stack slot.
+    kRegister = 3,
+    // On 32bits architectures, quick can pass a long where the
+    // low bits are in the last parameter register, and the high
+    // bits are in a stack slot. The kQuickParameter kind is for
+    // handling this special case.
+    kQuickParameter = 4,
+  };
 
-  Location() : reg_(kInvalid) { }
-  explicit Location(uword reg) : reg_(reg) { }
-
-  static Location RegisterLocation(uword reg) {
-    return Location(reg);
+  Location() : value_(kInvalid) {
+    DCHECK(!IsValid());
   }
 
-  bool IsValid() const { return reg_ != kInvalid; }
-
-  Location(const Location& other) : reg_(other.reg_) { }
+  Location(const Location& other) : ValueObject(), value_(other.value_) {}
 
   Location& operator=(const Location& other) {
-    reg_ = other.reg_;
+    value_ = other.value_;
     return *this;
   }
 
+  bool IsValid() const {
+    return value_ != kInvalid;
+  }
+
+  // Register locations.
+  static Location RegisterLocation(ManagedRegister reg) {
+    return Location(kRegister, reg.RegId());
+  }
+
+  bool IsRegister() const {
+    return GetKind() == kRegister;
+  }
+
+  ManagedRegister reg() const {
+    DCHECK(IsRegister());
+    return static_cast<ManagedRegister>(GetPayload());
+  }
+
+  static uword EncodeStackIndex(intptr_t stack_index) {
+    DCHECK(-kStackIndexBias <= stack_index);
+    DCHECK(stack_index < kStackIndexBias);
+    return static_cast<uword>(kStackIndexBias + stack_index);
+  }
+
+  static Location StackSlot(intptr_t stack_index) {
+    uword payload = EncodeStackIndex(stack_index);
+    Location loc(kStackSlot, payload);
+    // Ensure that sign is preserved.
+    DCHECK_EQ(loc.GetStackIndex(), stack_index);
+    return loc;
+  }
+
+  bool IsStackSlot() const {
+    return GetKind() == kStackSlot;
+  }
+
+  static Location DoubleStackSlot(intptr_t stack_index) {
+    uword payload = EncodeStackIndex(stack_index);
+    Location loc(kDoubleStackSlot, payload);
+    // Ensure that sign is preserved.
+    DCHECK_EQ(loc.GetStackIndex(), stack_index);
+    return loc;
+  }
+
+  bool IsDoubleStackSlot() const {
+    return GetKind() == kDoubleStackSlot;
+  }
+
+  intptr_t GetStackIndex() const {
+    DCHECK(IsStackSlot() || IsDoubleStackSlot());
+    // Decode stack index manually to preserve sign.
+    return GetPayload() - kStackIndexBias;
+  }
+
+  intptr_t GetHighStackIndex(uintptr_t word_size) const {
+    DCHECK(IsDoubleStackSlot());
+    // Decode stack index manually to preserve sign.
+    return GetPayload() - kStackIndexBias + word_size;
+  }
+
+  static Location QuickParameter(uint32_t parameter_index) {
+    return Location(kQuickParameter, parameter_index);
+  }
+
+  uint32_t GetQuickParameterIndex() const {
+    DCHECK(IsQuickParameter());
+    return GetPayload();
+  }
+
+  bool IsQuickParameter() const {
+    return GetKind() == kQuickParameter;
+  }
+
+  arm::ArmManagedRegister AsArm() const;
+  x86::X86ManagedRegister AsX86() const;
+
+  Kind GetKind() const {
+    return KindField::Decode(value_);
+  }
+
+  bool Equals(Location other) const {
+    return value_ == other.value_;
+  }
+
+  const char* DebugString() const {
+    switch (GetKind()) {
+      case kInvalid: return "?";
+      case kRegister: return "R";
+      case kStackSlot: return "S";
+      case kDoubleStackSlot: return "DS";
+      case kQuickParameter: return "Q";
+    }
+    return "?";
+  }
+
  private:
-  // The target register for that location.
-  // TODO: Support stack location.
-  uword reg_;
-  static const uword kInvalid = -1;
+  // Number of bits required to encode Kind value.
+  static constexpr uint32_t kBitsForKind = 4;
+  static constexpr uint32_t kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind;
+
+  explicit Location(uword value) : value_(value) {}
+
+  Location(Kind kind, uword payload)
+      : value_(KindField::Encode(kind) | PayloadField::Encode(payload)) {}
+
+  uword GetPayload() const {
+    return PayloadField::Decode(value_);
+  }
+
+  typedef BitField<Kind, 0, kBitsForKind> KindField;
+  typedef BitField<uword, kBitsForKind, kBitsForPayload> PayloadField;
+
+  // Layout for stack slots.
+  static const intptr_t kStackIndexBias =
+      static_cast<intptr_t>(1) << (kBitsForPayload - 1);
+
+  // Location either contains kind and payload fields or a tagged handle for
+  // a constant locations. Values of enumeration Kind are selected in such a
+  // way that none of them can be interpreted as a kConstant tag.
+  uword value_;
 };
 
 /**
@@ -203,11 +325,10 @@
     return registers_[index];
   }
 
-  uint8_t GetStackOffsetOf(size_t index) const {
-    DCHECK_GE(index, number_of_registers_);
+  uint8_t GetStackOffsetOf(size_t index, size_t word_size) const {
     // We still reserve the space for parameters passed by registers.
-    // Add kWordSize for the method pointer.
-    return index * kWordSize + kWordSize;
+    // Add word_size for the method pointer.
+    return index * kVRegSize + word_size;
   }
 
  private:
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 4e88765..27691ac 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -17,6 +17,7 @@
 #include "code_generator_arm.h"
 #include "utils/assembler.h"
 #include "utils/arm/assembler_arm.h"
+#include "utils/arm/managed_register_arm.h"
 
 #include "mirror/array.h"
 #include "mirror/art_method.h"
@@ -24,11 +25,20 @@
 #define __ reinterpret_cast<ArmAssembler*>(GetAssembler())->
 
 namespace art {
+
+arm::ArmManagedRegister Location::AsArm() const {
+  return reg().AsArm();
+}
+
 namespace arm {
 
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
+static Location ArmCoreLocation(Register reg) {
+  return Location::RegisterLocation(ArmManagedRegister::FromCoreRegister(reg));
+}
+
 InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
       : HGraphVisitor(graph),
         assembler_(codegen->GetAssembler()),
@@ -38,9 +48,11 @@
   core_spill_mask_ |= (1 << LR);
   __ PushList((1 << LR));
 
-  // Add the current ART method to the frame size, the return PC, and the filler.
-  SetFrameSize(RoundUp((
-      GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs() + 3) * kArmWordSize,
+  SetFrameSize(RoundUp(
+      (GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs()) * kVRegSize
+      + kVRegSize  // filler
+      + kArmWordSize  // Art method
+      + kNumberOfPushedRegistersAtEntry * kArmWordSize,
       kStackAlignment));
   // The return PC has already been pushed on the stack.
   __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize));
@@ -63,28 +75,204 @@
   if (reg_number >= number_of_vregs - number_of_in_vregs) {
     // Local is a parameter of the method. It is stored in the caller's frame.
     return GetFrameSize() + kArmWordSize  // ART method
-                          + (reg_number - number_of_vregs + number_of_in_vregs) * kArmWordSize;
+                          + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
   } else {
     // Local is a temporary in this method. It is stored in this method's frame.
     return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kArmWordSize)
-                          - kArmWordSize  // filler.
-                          - (number_of_vregs * kArmWordSize)
-                          + (reg_number * kArmWordSize);
+                          - kVRegSize  // filler.
+                          - (number_of_vregs * kVRegSize)
+                          + (reg_number * kVRegSize);
+  }
+}
+
+Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      uint32_t index = gp_index_++;
+      if (index < calling_convention.GetNumberOfRegisters()) {
+        return ArmCoreLocation(calling_convention.GetRegisterAt(index));
+      } else {
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize));
+      }
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t index = gp_index_;
+      gp_index_ += 2;
+      if (index + 1 < calling_convention.GetNumberOfRegisters()) {
+        return Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(
+            calling_convention.GetRegisterPairAt(index)));
+      } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
+        return Location::QuickParameter(index);
+      } else {
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kArmWordSize));
+      }
+    }
+
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat:
+      LOG(FATAL) << "Unimplemented parameter type " << type;
+      break;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected parameter type " << type;
+      break;
+  }
+  return Location();
+}
+
+void CodeGeneratorARM::Move32(Location destination, Location source) {
+  if (source.Equals(destination)) {
+    return;
+  }
+  if (destination.IsRegister()) {
+    if (source.IsRegister()) {
+      __ Mov(destination.AsArm().AsCoreRegister(), source.AsArm().AsCoreRegister());
+    } else {
+      __ ldr(destination.AsArm().AsCoreRegister(), Address(SP, source.GetStackIndex()));
+    }
+  } else {
+    DCHECK(destination.IsStackSlot());
+    if (source.IsRegister()) {
+      __ str(source.AsArm().AsCoreRegister(), Address(SP, destination.GetStackIndex()));
+    } else {
+      __ ldr(R0, Address(SP, source.GetStackIndex()));
+      __ str(R0, Address(SP, destination.GetStackIndex()));
+    }
+  }
+}
+
+void CodeGeneratorARM::Move64(Location destination, Location source) {
+  if (source.Equals(destination)) {
+    return;
+  }
+  if (destination.IsRegister()) {
+    if (source.IsRegister()) {
+      __ Mov(destination.AsArm().AsRegisterPairLow(), source.AsArm().AsRegisterPairLow());
+      __ Mov(destination.AsArm().AsRegisterPairHigh(), source.AsArm().AsRegisterPairHigh());
+    } else if (source.IsQuickParameter()) {
+      uint32_t argument_index = source.GetQuickParameterIndex();
+      InvokeDexCallingConvention calling_convention;
+      __ Mov(destination.AsArm().AsRegisterPairLow(),
+             calling_convention.GetRegisterAt(argument_index));
+      __ ldr(destination.AsArm().AsRegisterPairHigh(),
+             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize()));
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      if (destination.AsArm().AsRegisterPair() == R1_R2) {
+        __ ldr(R1, Address(SP, source.GetStackIndex()));
+        __ ldr(R2, Address(SP, source.GetHighStackIndex(kArmWordSize)));
+      } else {
+        __ LoadFromOffset(kLoadWordPair, destination.AsArm().AsRegisterPairLow(),
+                          SP, source.GetStackIndex());
+      }
+    }
+  } else if (destination.IsQuickParameter()) {
+    InvokeDexCallingConvention calling_convention;
+    uint32_t argument_index = destination.GetQuickParameterIndex();
+    if (source.IsRegister()) {
+      __ Mov(calling_convention.GetRegisterAt(argument_index), source.AsArm().AsRegisterPairLow());
+      __ str(source.AsArm().AsRegisterPairHigh(),
+             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize)));
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ ldr(calling_convention.GetRegisterAt(argument_index), Address(SP, source.GetStackIndex()));
+      __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize)));
+      __ str(R0, Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize)));
+    }
+  } else {
+    DCHECK(destination.IsDoubleStackSlot());
+    if (source.IsRegister()) {
+      if (source.AsArm().AsRegisterPair() == R1_R2) {
+        __ str(R1, Address(SP, destination.GetStackIndex()));
+        __ str(R2, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
+      } else {
+        __ StoreToOffset(kStoreWordPair, source.AsArm().AsRegisterPairLow(),
+                         SP, destination.GetStackIndex());
+      }
+    } else if (source.IsQuickParameter()) {
+      InvokeDexCallingConvention calling_convention;
+      uint32_t argument_index = source.GetQuickParameterIndex();
+      __ str(calling_convention.GetRegisterAt(argument_index),
+             Address(SP, destination.GetStackIndex()));
+      __ ldr(R0,
+             Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1, kArmWordSize) + GetFrameSize()));
+      __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ ldr(R0, Address(SP, source.GetStackIndex()));
+      __ str(R0, Address(SP, destination.GetStackIndex()));
+      __ ldr(R0, Address(SP, source.GetHighStackIndex(kArmWordSize)));
+      __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
+    }
   }
 }
 
 void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
   if (instruction->AsIntConstant() != nullptr) {
-    __ LoadImmediate(location.reg<Register>(), instruction->AsIntConstant()->GetValue());
+    int32_t value = instruction->AsIntConstant()->GetValue();
+    if (location.IsRegister()) {
+      __ LoadImmediate(location.AsArm().AsCoreRegister(), value);
+    } else {
+      __ LoadImmediate(R0, value);
+      __ str(R0, Address(SP, location.GetStackIndex()));
+    }
+  } else if (instruction->AsLongConstant() != nullptr) {
+    int64_t value = instruction->AsLongConstant()->GetValue();
+    if (location.IsRegister()) {
+      __ LoadImmediate(location.AsArm().AsRegisterPairLow(), Low32Bits(value));
+      __ LoadImmediate(location.AsArm().AsRegisterPairHigh(), High32Bits(value));
+    } else {
+      __ LoadImmediate(R0, Low32Bits(value));
+      __ str(R0, Address(SP, location.GetStackIndex()));
+      __ LoadImmediate(R0, High32Bits(value));
+      __ str(R0, Address(SP, location.GetHighStackIndex(kArmWordSize)));
+    }
   } else if (instruction->AsLoadLocal() != nullptr) {
-    __ LoadFromOffset(kLoadWord, location.reg<Register>(),
-                      SP, GetStackSlot(instruction->AsLoadLocal()->GetLocal()));
+    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
+    switch (instruction->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+        Move32(location, Location::StackSlot(stack_slot));
+        break;
+
+      case Primitive::kPrimLong:
+        Move64(location, Location::DoubleStackSlot(stack_slot));
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+    }
   } else {
     // This can currently only happen when the instruction that requests the move
     // is the next to be compiled.
     DCHECK_EQ(instruction->GetNext(), move_for);
-    __ mov(location.reg<Register>(),
-           ShifterOperand(instruction->GetLocations()->Out().reg<Register>()));
+    switch (instruction->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimNot:
+      case Primitive::kPrimInt:
+        Move32(location, instruction->GetLocations()->Out());
+        break;
+
+      case Primitive::kPrimLong:
+        Move64(location, instruction->GetLocations()->Out());
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+    }
   }
 }
 
@@ -114,13 +302,13 @@
 
 void LocationsBuilderARM::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, Location(R0));
+  locations->SetInAt(0, ArmCoreLocation(R0));
   if_instr->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
   // TODO: Generate the input as a condition, instead of materializing in a register.
-  __ cmp(if_instr->GetLocations()->InAt(0).reg<Register>(), ShifterOperand(0));
+  __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(0));
   __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()), EQ);
   if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
     __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
@@ -129,18 +317,18 @@
 
 void LocationsBuilderARM::VisitEqual(HEqual* equal) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
-  locations->SetInAt(0, Location(R0));
-  locations->SetInAt(1, Location(R1));
-  locations->SetOut(Location(R0));
+  locations->SetInAt(0, ArmCoreLocation(R0));
+  locations->SetInAt(1, ArmCoreLocation(R1));
+  locations->SetOut(ArmCoreLocation(R0));
   equal->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitEqual(HEqual* equal) {
   LocationSummary* locations = equal->GetLocations();
-  __ teq(locations->InAt(0).reg<Register>(),
-         ShifterOperand(locations->InAt(1).reg<Register>()));
-  __ mov(locations->Out().reg<Register>(), ShifterOperand(1), EQ);
-  __ mov(locations->Out().reg<Register>(), ShifterOperand(0), NE);
+  __ teq(locations->InAt(0).AsArm().AsCoreRegister(),
+         ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
+  __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(1), EQ);
+  __ mov(locations->Out().AsArm().AsCoreRegister(), ShifterOperand(0), NE);
 }
 
 void LocationsBuilderARM::VisitLocal(HLocal* local) {
@@ -161,14 +349,27 @@
 
 void LocationsBuilderARM::VisitStoreLocal(HStoreLocal* store) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  locations->SetInAt(1, Location(R0));
+  switch (store->InputAt(1)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    case Primitive::kPrimLong:
+      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+  }
   store->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = store->GetLocations();
-  __ StoreToOffset(kStoreWord, locations->InAt(1).reg<Register>(),
-                   SP, codegen_->GetStackSlot(store->GetLocal()));
 }
 
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
@@ -179,6 +380,14 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
+  constant->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM::VisitLongConstant(HLongConstant* constant) {
+  // Will be generated at use site.
+}
+
 void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
   ret->SetLocations(nullptr);
 }
@@ -189,56 +398,83 @@
 
 void LocationsBuilderARM::VisitReturn(HReturn* ret) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret);
-  locations->SetInAt(0, Location(R0));
+  switch (ret->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      locations->SetInAt(0, ArmCoreLocation(R0));
+      break;
+
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+  }
+
   ret->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
-  DCHECK_EQ(ret->GetLocations()->InAt(0).reg<Register>(), R0);
+  if (kIsDebugBuild) {
+    switch (ret->InputAt(0)->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsArm().AsCoreRegister(), R0);
+        break;
+
+      case Primitive::kPrimLong:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsArm().AsRegisterPair(), R0_R1);
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+    }
+  }
   codegen_->GenerateFrameExit();
 }
 
-static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
-static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-
-class InvokeDexCallingConvention : public CallingConvention<Register> {
- public:
-  InvokeDexCallingConvention()
-      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
-};
-
-void LocationsBuilderARM::VisitPushArgument(HPushArgument* argument) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(argument);
-  InvokeDexCallingConvention calling_convention;
-  if (argument->GetArgumentIndex() < calling_convention.GetNumberOfRegisters()) {
-    Location location = Location(calling_convention.GetRegisterAt(argument->GetArgumentIndex()));
-    locations->SetInAt(0, location);
-    locations->SetOut(location);
-  } else {
-    locations->SetInAt(0, Location(R0));
-  }
-  argument->SetLocations(locations);
-}
-
-void InstructionCodeGeneratorARM::VisitPushArgument(HPushArgument* argument) {
-  uint8_t argument_index = argument->GetArgumentIndex();
-  InvokeDexCallingConvention calling_convention;
-  size_t parameter_registers = calling_convention.GetNumberOfRegisters();
-  LocationSummary* locations = argument->GetLocations();
-  if (argument_index >= parameter_registers) {
-    uint8_t offset = calling_convention.GetStackOffsetOf(argument_index);
-    __ StoreToOffset(kStoreWord, locations->InAt(0).reg<Register>(), SP, offset);
-  } else {
-    DCHECK_EQ(locations->Out().reg<Register>(), locations->InAt(0).reg<Register>());
-  }
-}
-
 void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
-  locations->AddTemp(Location(R0));
+  locations->AddTemp(ArmCoreLocation(R0));
+
+  InvokeDexCallingConventionVisitor calling_convention_visitor;
+  for (int i = 0; i < invoke->InputCount(); i++) {
+    HInstruction* input = invoke->InputAt(i);
+    locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
+  }
+
+  switch (invoke->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      locations->SetOut(ArmCoreLocation(R0));
+      break;
+
+    case Primitive::kPrimLong:
+      locations->SetOut(Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
+      break;
+
+    case Primitive::kPrimVoid:
+      break;
+
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat:
+      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
+      break;
+  }
+
   invoke->SetLocations(locations);
 }
 
@@ -247,7 +483,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) {
-  Register temp = invoke->GetLocations()->GetTemp(0).reg<Register>();
+  Register temp = invoke->GetLocations()->GetTemp(0).AsArm().AsCoreRegister();
   size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
       invoke->GetIndexInDexCache() * kArmWordSize;
 
@@ -277,13 +513,30 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add);
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
-      locations->SetInAt(0, Location(R0));
-      locations->SetInAt(1, Location(R1));
-      locations->SetOut(Location(R0));
+      locations->SetInAt(0, ArmCoreLocation(R0));
+      locations->SetInAt(1, ArmCoreLocation(R1));
+      locations->SetOut(ArmCoreLocation(R0));
       break;
     }
+
+    case Primitive::kPrimLong: {
+      locations->SetInAt(
+          0, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
+      locations->SetInAt(
+          1, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R2_R3)));
+      locations->SetOut(Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented";
+      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
   }
   add->SetLocations(locations);
 }
@@ -292,12 +545,29 @@
   LocationSummary* locations = add->GetLocations();
   switch (add->GetResultType()) {
     case Primitive::kPrimInt:
-      __ add(locations->Out().reg<Register>(),
-             locations->InAt(0).reg<Register>(),
-             ShifterOperand(locations->InAt(1).reg<Register>()));
+      __ add(locations->Out().AsArm().AsCoreRegister(),
+             locations->InAt(0).AsArm().AsCoreRegister(),
+             ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
       break;
+
+    case Primitive::kPrimLong:
+      __ adds(locations->Out().AsArm().AsRegisterPairLow(),
+              locations->InAt(0).AsArm().AsRegisterPairLow(),
+              ShifterOperand(locations->InAt(1).AsArm().AsRegisterPairLow()));
+      __ adc(locations->Out().AsArm().AsRegisterPairHigh(),
+             locations->InAt(0).AsArm().AsRegisterPairHigh(),
+             ShifterOperand(locations->InAt(1).AsArm().AsRegisterPairHigh()));
+      break;
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented";
+      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
   }
 }
 
@@ -305,13 +575,30 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub);
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
-      locations->SetInAt(0, Location(R0));
-      locations->SetInAt(1, Location(R1));
-      locations->SetOut(Location(R0));
+      locations->SetInAt(0, ArmCoreLocation(R0));
+      locations->SetInAt(1, ArmCoreLocation(R1));
+      locations->SetOut(ArmCoreLocation(R0));
       break;
     }
+
+    case Primitive::kPrimLong: {
+      locations->SetInAt(
+          0, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
+      locations->SetInAt(
+          1, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R2_R3)));
+      locations->SetOut(Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented";
+      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
   }
   sub->SetLocations(locations);
 }
@@ -320,12 +607,29 @@
   LocationSummary* locations = sub->GetLocations();
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt:
-      __ sub(locations->Out().reg<Register>(),
-             locations->InAt(0).reg<Register>(),
-             ShifterOperand(locations->InAt(1).reg<Register>()));
+      __ sub(locations->Out().AsArm().AsCoreRegister(),
+             locations->InAt(0).AsArm().AsCoreRegister(),
+             ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
       break;
+
+    case Primitive::kPrimLong:
+      __ subs(locations->Out().AsArm().AsRegisterPairLow(),
+              locations->InAt(0).AsArm().AsRegisterPairLow(),
+              ShifterOperand(locations->InAt(1).AsArm().AsRegisterPairLow()));
+      __ sbc(locations->Out().AsArm().AsRegisterPairHigh(),
+             locations->InAt(0).AsArm().AsRegisterPairHigh(),
+             ShifterOperand(locations->InAt(1).AsArm().AsRegisterPairHigh()));
+      break;
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented";
+      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
   }
 }
 
@@ -345,7 +649,7 @@
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetOut(Location(R0));
+  locations->SetOut(ArmCoreLocation(R0));
   instruction->SetLocations(locations);
 }
 
@@ -363,36 +667,31 @@
 
 void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  InvokeDexCallingConvention calling_convention;
-  uint32_t argument_index = instruction->GetIndex();
-  if (argument_index < calling_convention.GetNumberOfRegisters()) {
-    locations->SetOut(Location(calling_convention.GetRegisterAt(argument_index)));
-  } else {
-    locations->SetOut(Location(R0));
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   }
+  locations->SetOut(location);
   instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitParameterValue(HParameterValue* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  InvokeDexCallingConvention calling_convention;
-  uint8_t argument_index = instruction->GetIndex();
-  if (argument_index >= calling_convention.GetNumberOfRegisters()) {
-    uint8_t offset = calling_convention.GetStackOffsetOf(argument_index);
-    __ ldr(locations->Out().reg<Register>(), Address(SP, offset + codegen_->GetFrameSize()));
-  }
+  // Nothing to do, the parameter is already at its location.
 }
 
 void LocationsBuilderARM::VisitNot(HNot* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location(R0));
-  locations->SetOut(Location(R0));
+  locations->SetInAt(0, ArmCoreLocation(R0));
+  locations->SetOut(ArmCoreLocation(R0));
   instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  __ eor(locations->Out().reg<Register>(), locations->InAt(0).reg<Register>(), ShifterOperand(1));
+  __ eor(locations->Out().AsArm().AsCoreRegister(),
+         locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(1));
 }
 
 }  // namespace arm
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index a51d85e..ed35f94 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -24,11 +24,45 @@
 namespace art {
 namespace arm {
 
+class CodeGeneratorARM;
+
 static constexpr size_t kArmWordSize = 4;
 
+static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
+static constexpr RegisterPair kParameterCorePairRegisters[] = { R1_R2, R2_R3 };
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+class InvokeDexCallingConvention : public CallingConvention<Register> {
+ public:
+  InvokeDexCallingConvention()
+      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+
+  RegisterPair GetRegisterPairAt(size_t argument_index) {
+    DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
+    return kParameterCorePairRegisters[argument_index];
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitor() : gp_index_(0) {}
+
+  Location GetNextLocation(Primitive::Type type);
+
+ private:
+  InvokeDexCallingConvention calling_convention;
+  uint32_t gp_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+};
+
 class LocationsBuilderARM : public HGraphVisitor {
  public:
-  explicit LocationsBuilderARM(HGraph* graph) : HGraphVisitor(graph) { }
+  explicit LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
+      : HGraphVisitor(graph), codegen_(codegen) {}
 
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
@@ -38,11 +72,12 @@
 #undef DECLARE_VISIT_INSTRUCTION
 
  private:
+  CodeGeneratorARM* const codegen_;
+  InvokeDexCallingConventionVisitor parameter_visitor_;
+
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
 };
 
-class CodeGeneratorARM;
-
 class InstructionCodeGeneratorARM : public HGraphVisitor {
  public:
   InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen);
@@ -68,7 +103,7 @@
  public:
   explicit CodeGeneratorARM(HGraph* graph)
       : CodeGenerator(graph),
-        location_builder_(graph),
+        location_builder_(graph, this),
         instruction_visitor_(graph, this) { }
   virtual ~CodeGeneratorARM() { }
 
@@ -96,6 +131,11 @@
   int32_t GetStackSlot(HLocal* local) const;
 
  private:
+  // Helper method to move a 32bits value between two locations.
+  void Move32(Location destination, Location source);
+  // Helper method to move a 64bits value between two locations.
+  void Move64(Location destination, Location source);
+
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
   ArmAssembler assembler_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 88198dc..1142631 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -17,6 +17,7 @@
 #include "code_generator_x86.h"
 #include "utils/assembler.h"
 #include "utils/x86/assembler_x86.h"
+#include "utils/x86/managed_register_x86.h"
 
 #include "mirror/array.h"
 #include "mirror/art_method.h"
@@ -24,11 +25,20 @@
 #define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
 
 namespace art {
+
+x86::X86ManagedRegister Location::AsX86() const {
+  return reg().AsX86();
+}
+
 namespace x86 {
 
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
+static Location X86CpuLocation(Register reg) {
+  return Location::RegisterLocation(X86ManagedRegister::FromCpuRegister(reg));
+}
+
 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
       : HGraphVisitor(graph),
         assembler_(codegen->GetAssembler()),
@@ -39,10 +49,13 @@
   static const int kFakeReturnRegister = 8;
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
-  // Add the current ART method to the frame size, the return PC, and the filler.
-  SetFrameSize(RoundUp((
-      GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs() + 3) * kX86WordSize,
+  SetFrameSize(RoundUp(
+      (GetGraph()->GetMaximumNumberOfOutVRegs() + GetGraph()->GetNumberOfVRegs()) * kVRegSize
+      + kVRegSize  // filler
+      + kX86WordSize  // Art method
+      + kNumberOfPushedRegistersAtEntry * kX86WordSize,
       kStackAlignment));
+
   // The return PC has already been pushed on the stack.
   __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize));
   __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
@@ -67,28 +80,208 @@
   if (reg_number >= number_of_vregs - number_of_in_vregs) {
     // Local is a parameter of the method. It is stored in the caller's frame.
     return GetFrameSize() + kX86WordSize  // ART method
-                          + (reg_number - number_of_vregs + number_of_in_vregs) * kX86WordSize;
+                          + (reg_number - number_of_vregs + number_of_in_vregs) * kVRegSize;
   } else {
     // Local is a temporary in this method. It is stored in this method's frame.
     return GetFrameSize() - (kNumberOfPushedRegistersAtEntry * kX86WordSize)
-                          - kX86WordSize  // filler.
-                          - (number_of_vregs * kX86WordSize)
-                          + (reg_number * kX86WordSize);
+                          - kVRegSize  // filler.
+                          - (number_of_vregs * kVRegSize)
+                          + (reg_number * kVRegSize);
+  }
+}
+
+static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX };
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+    arraysize(kRuntimeParameterCoreRegisters);
+
+class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
+ public:
+  InvokeRuntimeCallingConvention()
+      : CallingConvention(kRuntimeParameterCoreRegisters,
+                          kRuntimeParameterCoreRegistersLength) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      uint32_t index = gp_index_++;
+      if (index < calling_convention.GetNumberOfRegisters()) {
+        return X86CpuLocation(calling_convention.GetRegisterAt(index));
+      } else {
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize));
+      }
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t index = gp_index_;
+      gp_index_ += 2;
+      if (index + 1 < calling_convention.GetNumberOfRegisters()) {
+        return Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(
+            calling_convention.GetRegisterPairAt(index)));
+      } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
+        return Location::QuickParameter(index);
+      } else {
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(index, kX86WordSize));
+      }
+    }
+
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat:
+      LOG(FATAL) << "Unimplemented parameter type " << type;
+      break;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected parameter type " << type;
+      break;
+  }
+  return Location();
+}
+
+void CodeGeneratorX86::Move32(Location destination, Location source) {
+  if (source.Equals(destination)) {
+    return;
+  }
+  if (destination.IsRegister()) {
+    if (source.IsRegister()) {
+      __ movl(destination.AsX86().AsCpuRegister(), source.AsX86().AsCpuRegister());
+    } else {
+      DCHECK(source.IsStackSlot());
+      __ movl(destination.AsX86().AsCpuRegister(), Address(ESP, source.GetStackIndex()));
+    }
+  } else {
+    if (source.IsRegister()) {
+      __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsCpuRegister());
+    } else {
+      DCHECK(source.IsStackSlot());
+      __ movl(EAX, Address(ESP, source.GetStackIndex()));
+      __ movl(Address(ESP, destination.GetStackIndex()), EAX);
+    }
+  }
+}
+
+void CodeGeneratorX86::Move64(Location destination, Location source) {
+  if (source.Equals(destination)) {
+    return;
+  }
+  if (destination.IsRegister()) {
+    if (source.IsRegister()) {
+      __ movl(destination.AsX86().AsRegisterPairLow(), source.AsX86().AsRegisterPairLow());
+      __ movl(destination.AsX86().AsRegisterPairHigh(), source.AsX86().AsRegisterPairHigh());
+    } else if (source.IsQuickParameter()) {
+      uint32_t argument_index = source.GetQuickParameterIndex();
+      InvokeDexCallingConvention calling_convention;
+      __ movl(destination.AsX86().AsRegisterPairLow(),
+              calling_convention.GetRegisterAt(argument_index));
+      __ movl(destination.AsX86().AsRegisterPairHigh(), Address(ESP,
+          calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize()));
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ movl(destination.AsX86().AsRegisterPairLow(), Address(ESP, source.GetStackIndex()));
+      __ movl(destination.AsX86().AsRegisterPairHigh(),
+              Address(ESP, source.GetHighStackIndex(kX86WordSize)));
+    }
+  } else if (destination.IsQuickParameter()) {
+    InvokeDexCallingConvention calling_convention;
+    uint32_t argument_index = destination.GetQuickParameterIndex();
+    if (source.IsRegister()) {
+      __ movl(calling_convention.GetRegisterAt(argument_index), source.AsX86().AsRegisterPairLow());
+      __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)),
+              source.AsX86().AsRegisterPairHigh());
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ movl(calling_convention.GetRegisterAt(argument_index),
+              Address(ESP, source.GetStackIndex()));
+      __ movl(EAX, Address(ESP, source.GetHighStackIndex(kX86WordSize)));
+      __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize)), EAX);
+    }
+  } else {
+    if (source.IsRegister()) {
+      __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsRegisterPairLow());
+      __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
+              source.AsX86().AsRegisterPairHigh());
+    } else if (source.IsQuickParameter()) {
+      InvokeDexCallingConvention calling_convention;
+      uint32_t argument_index = source.GetQuickParameterIndex();
+      __ movl(Address(ESP, destination.GetStackIndex()),
+              calling_convention.GetRegisterAt(argument_index));
+      __ movl(EAX, Address(ESP,
+          calling_convention.GetStackOffsetOf(argument_index + 1, kX86WordSize) + GetFrameSize()));
+      __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), EAX);
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ movl(EAX, Address(ESP, source.GetStackIndex()));
+      __ movl(Address(ESP, destination.GetStackIndex()), EAX);
+      __ movl(EAX, Address(ESP, source.GetHighStackIndex(kX86WordSize)));
+      __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), EAX);
+    }
   }
 }
 
 void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
   if (instruction->AsIntConstant() != nullptr) {
-    __ movl(location.reg<Register>(), Immediate(instruction->AsIntConstant()->GetValue()));
+    Immediate imm(instruction->AsIntConstant()->GetValue());
+    if (location.IsRegister()) {
+      __ movl(location.AsX86().AsCpuRegister(), imm);
+    } else {
+      __ movl(Address(ESP, location.GetStackIndex()), imm);
+    }
+  } else if (instruction->AsLongConstant() != nullptr) {
+    int64_t value = instruction->AsLongConstant()->GetValue();
+    if (location.IsRegister()) {
+      __ movl(location.AsX86().AsRegisterPairLow(), Immediate(Low32Bits(value)));
+      __ movl(location.AsX86().AsRegisterPairHigh(), Immediate(High32Bits(value)));
+    } else {
+      __ movl(Address(ESP, location.GetStackIndex()), Immediate(Low32Bits(value)));
+      __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
+    }
   } else if (instruction->AsLoadLocal() != nullptr) {
-    __ movl(location.reg<Register>(),
-            Address(ESP, GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+    switch (instruction->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+        Move32(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+        break;
+
+      case Primitive::kPrimLong:
+        Move64(location, Location::DoubleStackSlot(
+            GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented local type " << instruction->GetType();
+    }
   } else {
     // This can currently only happen when the instruction that requests the move
     // is the next to be compiled.
     DCHECK_EQ(instruction->GetNext(), move_for);
-    __ movl(location.reg<Register>(),
-            instruction->GetLocations()->Out().reg<Register>());
+    switch (instruction->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+        Move32(location, instruction->GetLocations()->Out());
+        break;
+
+      case Primitive::kPrimLong:
+        Move64(location, instruction->GetLocations()->Out());
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+    }
   }
 }
 
@@ -118,13 +311,13 @@
 
 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, Location(EAX));
+  locations->SetInAt(0, X86CpuLocation(EAX));
   if_instr->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
   // TODO: Generate the input as a condition, instead of materializing in a register.
-  __ cmpl(if_instr->GetLocations()->InAt(0).reg<Register>(), Immediate(0));
+  __ cmpl(if_instr->GetLocations()->InAt(0).AsX86().AsCpuRegister(), Immediate(0));
   __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
   if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
     __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
@@ -147,29 +340,43 @@
   // Nothing to do, this is driven by the code generator.
 }
 
-void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* local) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(local);
-  locations->SetInAt(1, Location(EAX));
-  local->SetLocations(locations);
+void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
+  switch (store->InputAt(1)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    case Primitive::kPrimLong:
+      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+  }
+  store->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store) {
-  __ movl(Address(ESP, codegen_->GetStackSlot(store->GetLocal())),
-          store->GetLocations()->InAt(1).reg<Register>());
 }
 
 void LocationsBuilderX86::VisitEqual(HEqual* equal) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
-  locations->SetInAt(0, Location(EAX));
-  locations->SetInAt(1, Location(ECX));
-  locations->SetOut(Location(EAX));
+  locations->SetInAt(0, X86CpuLocation(EAX));
+  locations->SetInAt(1, X86CpuLocation(ECX));
+  locations->SetOut(X86CpuLocation(EAX));
   equal->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitEqual(HEqual* equal) {
-  __ cmpl(equal->GetLocations()->InAt(0).reg<Register>(),
-          equal->GetLocations()->InAt(1).reg<Register>());
-  __ setb(kEqual, equal->GetLocations()->Out().reg<Register>());
+  __ cmpl(equal->GetLocations()->InAt(0).AsX86().AsCpuRegister(),
+          equal->GetLocations()->InAt(1).AsX86().AsCpuRegister());
+  __ setb(kEqual, equal->GetLocations()->Out().AsX86().AsCpuRegister());
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
@@ -180,6 +387,14 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
+  constant->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant) {
+  // Will be generated at use site.
+}
+
 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
   ret->SetLocations(nullptr);
 }
@@ -191,78 +406,89 @@
 
 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret);
-  locations->SetInAt(0, Location(EAX));
+  switch (ret->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      locations->SetInAt(0, X86CpuLocation(EAX));
+      break;
+
+    case Primitive::kPrimLong:
+      locations->SetInAt(
+          0, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+  }
   ret->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
-  DCHECK_EQ(ret->GetLocations()->InAt(0).reg<Register>(), EAX);
+  if (kIsDebugBuild) {
+    switch (ret->InputAt(0)->GetType()) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86().AsCpuRegister(), EAX);
+        break;
+
+      case Primitive::kPrimLong:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86().AsRegisterPair(), EAX_EDX);
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+    }
+  }
   codegen_->GenerateFrameExit();
   __ ret();
 }
 
-static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
-static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-
-class InvokeDexCallingConvention : public CallingConvention<Register> {
- public:
-  InvokeDexCallingConvention()
-      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
-};
-
-static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX };
-static constexpr size_t kRuntimeParameterCoreRegistersLength =
-    arraysize(kRuntimeParameterCoreRegisters);
-
-class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
- public:
-  InvokeRuntimeCallingConvention()
-      : CallingConvention(kRuntimeParameterCoreRegisters,
-                          kRuntimeParameterCoreRegistersLength) {}
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
-};
-
-void LocationsBuilderX86::VisitPushArgument(HPushArgument* argument) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(argument);
-  InvokeDexCallingConvention calling_convention;
-  if (argument->GetArgumentIndex() < calling_convention.GetNumberOfRegisters()) {
-    Location location = Location(calling_convention.GetRegisterAt(argument->GetArgumentIndex()));
-    locations->SetInAt(0, location);
-    locations->SetOut(location);
-  } else {
-    locations->SetInAt(0, Location(EAX));
-  }
-  argument->SetLocations(locations);
-}
-
-void InstructionCodeGeneratorX86::VisitPushArgument(HPushArgument* argument) {
-  uint8_t argument_index = argument->GetArgumentIndex();
-  InvokeDexCallingConvention calling_convention;
-  size_t parameter_registers = calling_convention.GetNumberOfRegisters();
-  if (argument_index >= parameter_registers) {
-    uint8_t offset = calling_convention.GetStackOffsetOf(argument_index);
-    __ movl(Address(ESP, offset),
-            argument->GetLocations()->InAt(0).reg<Register>());
-
-  } else {
-    DCHECK_EQ(argument->GetLocations()->Out().reg<Register>(),
-              argument->GetLocations()->InAt(0).reg<Register>());
-  }
-}
-
 void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
-  locations->AddTemp(Location(EAX));
+  locations->AddTemp(X86CpuLocation(EAX));
+
+  InvokeDexCallingConventionVisitor calling_convention_visitor;
+  for (int i = 0; i < invoke->InputCount(); i++) {
+    HInstruction* input = invoke->InputAt(i);
+    locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
+  }
+
+  switch (invoke->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      locations->SetOut(X86CpuLocation(EAX));
+      break;
+
+    case Primitive::kPrimLong:
+      locations->SetOut(Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
+      break;
+
+    case Primitive::kPrimVoid:
+      break;
+
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat:
+      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
+      break;
+  }
+
   invoke->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) {
-  Register temp = invoke->GetLocations()->GetTemp(0).reg<Register>();
+  Register temp = invoke->GetLocations()->GetTemp(0).AsX86().AsCpuRegister();
   size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
       invoke->GetIndexInDexCache() * kX86WordSize;
 
@@ -289,13 +515,29 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add);
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
-      locations->SetInAt(0, Location(EAX));
-      locations->SetInAt(1, Location(ECX));
-      locations->SetOut(Location(EAX));
+      locations->SetInAt(0, X86CpuLocation(EAX));
+      locations->SetInAt(1, X86CpuLocation(ECX));
+      locations->SetOut(X86CpuLocation(EAX));
       break;
     }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(
+          0, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
+      locations->SetInAt(
+          1, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(ECX_EBX)));
+      locations->SetOut(Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented";
+      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
   }
   add->SetLocations(locations);
 }
@@ -303,12 +545,33 @@
 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
   LocationSummary* locations = add->GetLocations();
   switch (add->GetResultType()) {
-    case Primitive::kPrimInt:
-      DCHECK_EQ(locations->InAt(0).reg<Register>(), locations->Out().reg<Register>());
-      __ addl(locations->InAt(0).reg<Register>(), locations->InAt(1).reg<Register>());
+    case Primitive::kPrimInt: {
+      DCHECK_EQ(locations->InAt(0).AsX86().AsCpuRegister(),
+                locations->Out().AsX86().AsCpuRegister());
+      __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
+              locations->InAt(1).AsX86().AsCpuRegister());
       break;
+    }
+
+    case Primitive::kPrimLong: {
+      DCHECK_EQ(locations->InAt(0).AsX86().AsRegisterPair(),
+                locations->Out().AsX86().AsRegisterPair());
+      __ addl(locations->InAt(0).AsX86().AsRegisterPairLow(),
+              locations->InAt(1).AsX86().AsRegisterPairLow());
+      __ adcl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
+              locations->InAt(1).AsX86().AsRegisterPairHigh());
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented";
+      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
   }
 }
 
@@ -316,13 +579,30 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub);
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
-      locations->SetInAt(0, Location(EAX));
-      locations->SetInAt(1, Location(ECX));
-      locations->SetOut(Location(EAX));
+      locations->SetInAt(0, X86CpuLocation(EAX));
+      locations->SetInAt(1, X86CpuLocation(ECX));
+      locations->SetOut(X86CpuLocation(EAX));
       break;
     }
+
+    case Primitive::kPrimLong: {
+      locations->SetInAt(
+          0, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
+      locations->SetInAt(
+          1, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(ECX_EBX)));
+      locations->SetOut(Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented";
+      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
   }
   sub->SetLocations(locations);
 }
@@ -330,18 +610,39 @@
 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
   LocationSummary* locations = sub->GetLocations();
   switch (sub->GetResultType()) {
-    case Primitive::kPrimInt:
-      DCHECK_EQ(locations->InAt(0).reg<Register>(), locations->Out().reg<Register>());
-      __ subl(locations->InAt(0).reg<Register>(), locations->InAt(1).reg<Register>());
+    case Primitive::kPrimInt: {
+      DCHECK_EQ(locations->InAt(0).AsX86().AsCpuRegister(),
+                locations->Out().AsX86().AsCpuRegister());
+      __ subl(locations->InAt(0).AsX86().AsCpuRegister(),
+              locations->InAt(1).AsX86().AsCpuRegister());
       break;
+    }
+
+    case Primitive::kPrimLong: {
+      DCHECK_EQ(locations->InAt(0).AsX86().AsRegisterPair(),
+                locations->Out().AsX86().AsRegisterPair());
+      __ subl(locations->InAt(0).AsX86().AsRegisterPairLow(),
+              locations->InAt(1).AsX86().AsRegisterPairLow());
+      __ sbbl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
+              locations->InAt(1).AsX86().AsRegisterPairHigh());
+      break;
+    }
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented";
+      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
   }
 }
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetOut(Location(EAX));
+  locations->SetOut(X86CpuLocation(EAX));
   instruction->SetLocations(locations);
 }
 
@@ -359,37 +660,31 @@
 
 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  InvokeDexCallingConvention calling_convention;
-  uint32_t argument_index = instruction->GetIndex();
-  if (argument_index < calling_convention.GetNumberOfRegisters()) {
-    locations->SetOut(Location(calling_convention.GetRegisterAt(argument_index)));
-  } else {
-    locations->SetOut(Location(EAX));
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
   }
+  locations->SetOut(location);
   instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitParameterValue(HParameterValue* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  InvokeDexCallingConvention calling_convention;
-  uint32_t argument_index = instruction->GetIndex();
-  if (argument_index >= calling_convention.GetNumberOfRegisters()) {
-    uint8_t offset = calling_convention.GetStackOffsetOf(argument_index);
-    __ movl(locations->Out().reg<Register>(), Address(ESP, offset + codegen_->GetFrameSize()));
-  }
+  // Nothing to do, the parameter is already at its location.
 }
 
 void LocationsBuilderX86::VisitNot(HNot* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location(EAX));
-  locations->SetOut(Location(EAX));
+  locations->SetInAt(0, X86CpuLocation(EAX));
+  locations->SetOut(X86CpuLocation(EAX));
   instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  DCHECK_EQ(locations->InAt(0).reg<Register>(), locations->Out().reg<Register>());
-  __ xorl(locations->Out().reg<Register>(), Immediate(1));
+  DCHECK_EQ(locations->InAt(0).AsX86().AsCpuRegister(), locations->Out().AsX86().AsCpuRegister());
+  __ xorl(locations->Out().AsX86().AsCpuRegister(), Immediate(1));
 }
 
 }  // namespace x86
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index bba81c0..f22890e 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -26,9 +26,43 @@
 
 static constexpr size_t kX86WordSize = 4;
 
+class CodeGeneratorX86;
+
+static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
+static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX };
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+class InvokeDexCallingConvention : public CallingConvention<Register> {
+ public:
+  InvokeDexCallingConvention()
+      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+
+  RegisterPair GetRegisterPairAt(size_t argument_index) {
+    DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
+    return kParameterCorePairRegisters[argument_index];
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitor() : gp_index_(0) {}
+
+  Location GetNextLocation(Primitive::Type type);
+
+ private:
+  InvokeDexCallingConvention calling_convention;
+  uint32_t gp_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+};
+
 class LocationsBuilderX86 : public HGraphVisitor {
  public:
-  explicit LocationsBuilderX86(HGraph* graph) : HGraphVisitor(graph) { }
+  LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen)
+      : HGraphVisitor(graph), codegen_(codegen) {}
 
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
@@ -38,11 +72,12 @@
 #undef DECLARE_VISIT_INSTRUCTION
 
  private:
+  CodeGeneratorX86* const codegen_;
+  InvokeDexCallingConventionVisitor parameter_visitor_;
+
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
 };
 
-class CodeGeneratorX86;
-
 class InstructionCodeGeneratorX86 : public HGraphVisitor {
  public:
   InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
@@ -69,7 +104,7 @@
  public:
   explicit CodeGeneratorX86(HGraph* graph)
       : CodeGenerator(graph),
-        location_builder_(graph),
+        location_builder_(graph, this),
         instruction_visitor_(graph, this) { }
   virtual ~CodeGeneratorX86() { }
 
@@ -97,6 +132,11 @@
   int32_t GetStackSlot(HLocal* local) const;
 
  private:
+  // Helper method to move a 32bits value between two locations.
+  void Move32(Location destination, Location source);
+  // Helper method to move a 64bits value between two locations.
+  void Move64(Location destination, Location source);
+
   LocationsBuilderX86 location_builder_;
   InstructionCodeGeneratorX86 instruction_visitor_;
   X86Assembler assembler_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index adea0ba..3da9ed9 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -226,10 +226,10 @@
   M(InvokeStatic)                                          \
   M(LoadLocal)                                             \
   M(Local)                                                 \
+  M(LongConstant)                                          \
   M(NewInstance)                                           \
   M(Not)                                                   \
   M(ParameterValue)                                        \
-  M(PushArgument)                                          \
   M(Return)                                                \
   M(ReturnVoid)                                            \
   M(StoreLocal)                                            \
@@ -283,6 +283,8 @@
   virtual void Accept(HGraphVisitor* visitor) = 0;
   virtual const char* DebugName() const = 0;
 
+  virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; }
+
   void AddUse(HInstruction* user) {
     uses_ = new (block_->GetGraph()->GetArena()) HUseListNode(user, uses_);
   }
@@ -534,6 +536,7 @@
   Primitive::Type GetResultType() const { return result_type_; }
 
   virtual bool IsCommutative() { return false; }
+  virtual Primitive::Type GetType() const { return GetResultType(); }
 
  private:
   const Primitive::Type result_type_;
@@ -550,6 +553,8 @@
 
   virtual bool IsCommutative() { return true; }
 
+  virtual Primitive::Type GetType() const { return Primitive::kPrimBoolean; }
+
   DECLARE_INSTRUCTION(Equal)
 
  private:
@@ -575,15 +580,19 @@
 // Load a given local. The local is an input of this instruction.
 class HLoadLocal : public HTemplateInstruction<1> {
  public:
-  explicit HLoadLocal(HLocal* local) {
+  explicit HLoadLocal(HLocal* local, Primitive::Type type) : type_(type) {
     SetRawInputAt(0, local);
   }
 
+  virtual Primitive::Type GetType() const { return type_; }
+
   HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
 
   DECLARE_INSTRUCTION(LoadLocal)
 
  private:
+  const Primitive::Type type_;
+
   DISALLOW_COPY_AND_ASSIGN(HLoadLocal);
 };
 
@@ -611,6 +620,7 @@
   explicit HIntConstant(int32_t value) : value_(value) { }
 
   int32_t GetValue() const { return value_; }
+  virtual Primitive::Type GetType() const { return Primitive::kPrimInt; }
 
   DECLARE_INSTRUCTION(IntConstant)
 
@@ -620,10 +630,30 @@
   DISALLOW_COPY_AND_ASSIGN(HIntConstant);
 };
 
+class HLongConstant : public HTemplateInstruction<0> {
+ public:
+  explicit HLongConstant(int64_t value) : value_(value) { }
+
+  int64_t GetValue() const { return value_; }
+
+  virtual Primitive::Type GetType() const { return Primitive::kPrimLong; }
+
+  DECLARE_INSTRUCTION(LongConstant)
+
+ private:
+  const int64_t value_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLongConstant);
+};
+
 class HInvoke : public HInstruction {
  public:
-  HInvoke(ArenaAllocator* arena, uint32_t number_of_arguments, uint32_t dex_pc)
+  HInvoke(ArenaAllocator* arena,
+          uint32_t number_of_arguments,
+          Primitive::Type return_type,
+          uint32_t dex_pc)
     : inputs_(arena, number_of_arguments),
+      return_type_(return_type),
       dex_pc_(dex_pc) {
     inputs_.SetSize(number_of_arguments);
   }
@@ -635,10 +665,13 @@
     inputs_.Put(index, argument);
   }
 
+  virtual Primitive::Type GetType() const { return return_type_; }
+
   uint32_t GetDexPc() const { return dex_pc_; }
 
  protected:
   GrowableArray<HInstruction*> inputs_;
+  const Primitive::Type return_type_;
   const uint32_t dex_pc_;
 
  private:
@@ -649,9 +682,11 @@
  public:
   HInvokeStatic(ArenaAllocator* arena,
                 uint32_t number_of_arguments,
+                Primitive::Type return_type,
                 uint32_t dex_pc,
                 uint32_t index_in_dex_cache)
-      : HInvoke(arena, number_of_arguments, dex_pc), index_in_dex_cache_(index_in_dex_cache) {}
+      : HInvoke(arena, number_of_arguments, return_type, dex_pc),
+        index_in_dex_cache_(index_in_dex_cache) {}
 
   uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; }
 
@@ -670,6 +705,8 @@
   uint32_t GetDexPc() const { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
 
+  virtual Primitive::Type GetType() const { return Primitive::kPrimNot; }
+
   DECLARE_INSTRUCTION(NewInstance)
 
  private:
@@ -679,24 +716,6 @@
   DISALLOW_COPY_AND_ASSIGN(HNewInstance);
 };
 
-// HPushArgument nodes are inserted after the evaluation of an argument
-// of a call. Their mere purpose is to ease the code generator's work.
-class HPushArgument : public HTemplateInstruction<1> {
- public:
-  HPushArgument(HInstruction* argument, uint8_t argument_index) : argument_index_(argument_index) {
-    SetRawInputAt(0, argument);
-  }
-
-  uint8_t GetArgumentIndex() const { return argument_index_; }
-
-  DECLARE_INSTRUCTION(PushArgument)
-
- private:
-  const uint8_t argument_index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HPushArgument);
-};
-
 class HAdd : public HBinaryOperation {
  public:
   HAdd(Primitive::Type result_type, HInstruction* left, HInstruction* right)
@@ -727,10 +746,13 @@
 // the calling convention.
 class HParameterValue : public HTemplateInstruction<0> {
  public:
-  explicit HParameterValue(uint8_t index) : index_(index) {}
+  HParameterValue(uint8_t index, Primitive::Type parameter_type)
+      : index_(index), parameter_type_(parameter_type) {}
 
   uint8_t GetIndex() const { return index_; }
 
+  virtual Primitive::Type GetType() const { return parameter_type_; }
+
   DECLARE_INSTRUCTION(ParameterValue);
 
  private:
@@ -738,6 +760,8 @@
   // than HGraph::number_of_in_vregs_;
   const uint8_t index_;
 
+  const Primitive::Type parameter_type_;
+
   DISALLOW_COPY_AND_ASSIGN(HParameterValue);
 };
 
@@ -747,6 +771,8 @@
     SetRawInputAt(0, input);
   }
 
+  virtual Primitive::Type GetType() const { return Primitive::kPrimBoolean; }
+
   DECLARE_INSTRUCTION(Not);
 
  private:
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 06ce3b4..bfb2829 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -70,11 +70,13 @@
     return ManagedRegister();
   }
 
+  int RegId() const { return id_; }
+  explicit ManagedRegister(int reg_id) : id_(reg_id) { }
+
  protected:
   static const int kNoRegister = -1;
 
   ManagedRegister() : id_(kNoRegister) { }
-  explicit ManagedRegister(int reg_id) : id_(reg_id) { }
 
   int id_;
 };
diff --git a/compiler/utils/x86/managed_register_x86.cc b/compiler/utils/x86/managed_register_x86.cc
index 7fae7a8..034a795 100644
--- a/compiler/utils/x86/managed_register_x86.cc
+++ b/compiler/utils/x86/managed_register_x86.cc
@@ -33,7 +33,8 @@
   P(EDX, EDI)                 \
   P(ECX, EBX)                 \
   P(ECX, EDI)                 \
-  P(EBX, EDI)
+  P(EBX, EDI)                 \
+  P(ECX, EDX)
 
 
 struct RegisterPairDescriptor {
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index 0201a96..09d2b49 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -37,7 +37,8 @@
   ECX_EBX = 7,
   ECX_EDI = 8,
   EBX_EDI = 9,
-  kNumberOfRegisterPairs = 10,
+  ECX_EDX = 10,  // Dalvik style passing
+  kNumberOfRegisterPairs = 11,
   kNoRegisterPair = -1,
 };
 
@@ -121,6 +122,12 @@
     return FromRegId(AllocIdHigh()).AsCpuRegister();
   }
 
+  RegisterPair AsRegisterPair() const {
+    CHECK(IsRegisterPair());
+    return static_cast<RegisterPair>(id_ -
+        (kNumberOfCpuRegIds + kNumberOfXmmRegIds + kNumberOfX87RegIds));
+  }
+
   bool IsCpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfCpuRegIds);
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 231fba1..e99c76f 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -29,7 +29,7 @@
 LOCAL_MULTILIB := both
 LOCAL_MODULE_STEM_32 := dalvikvm
 LOCAL_MODULE_STEM_64 := dalvikvm64
-include external/stlport/libstlport.mk
+include art/build/Android.libcxx.mk
 include $(BUILD_EXECUTABLE)
 ART_TARGET_EXECUTABLES += $(TARGET_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 
@@ -43,6 +43,8 @@
 LOCAL_SHARED_LIBRARIES := libnativehelper
 LOCAL_LDFLAGS := -ldl -lpthread
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_IS_HOST_MODULE := true
+include art/build/Android.libcxx.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 endif
diff --git a/dalvikvm/dalvikvm.cc b/dalvikvm/dalvikvm.cc
index 3486c1d..8d71a7c 100644
--- a/dalvikvm/dalvikvm.cc
+++ b/dalvikvm/dalvikvm.cc
@@ -15,11 +15,10 @@
  */
 
 #include <signal.h>
+#include <stdio.h>
+#include <string.h>
 
 #include <algorithm>
-#include <cstdio>
-#include <cstring>
-#include <string>
 
 #include "jni.h"
 #include "JniInvocation.h"
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index 038f0a7..c17788e 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -21,11 +21,19 @@
 DEX2OAT_SRC_FILES := \
 	dex2oat.cc
 
+# TODO: Remove this when the framework (installd) supports pushing the
+# right instruction-set parameter for the primary architecture.
+ifneq ($(filter ro.zygote=zygote64,$(PRODUCT_DEFAULT_PROPERTY_OVERRIDES)),)
+  dex2oat_arch := 64
+else
+  dex2oat_arch := 32
+endif
+
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler,art/compiler,target,ndebug,32))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler,art/compiler,target,ndebug,$(dex2oat_arch)))
 endif
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler,art/compiler,target,debug,32))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler,art/compiler,target,debug,$(dex2oat_arch)))
 endif
 
 ifeq ($(WITH_HOST_DALVIK),true)
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index 56929fc..17828fd 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -43,11 +43,10 @@
   art_ndebug_or_debug := $(2)
 
   include $(CLEAR_VARS)
-  ifeq ($$(art_target_or_host),target)
-    include external/stlport/libstlport.mk
-  else
-    LOCAL_IS_HOST_MODULE := true
+  ifeq ($$(art_target_or_host),host)
+     LOCAL_IS_HOST_MODULE := true
   endif
+  include art/build/Android.libcxx.mk
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-disassembler
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 9df69f0..d433fd5 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -344,9 +344,6 @@
   art_clang := $(3)
 
   include $(CLEAR_VARS)
-  ifeq ($$(art_target_or_host),target)
-    include external/stlport/libstlport.mk
-  endif
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart
@@ -366,6 +363,8 @@
     LOCAL_IS_HOST_MODULE := true
   endif
 
+  include art/build/Android.libcxx.mk
+
   GENERATED_SRC_DIR := $$(call local-generated-sources-dir)
   ENUM_OPERATOR_OUT_CC_FILES := $$(patsubst %.h,%_operator_out.cc,$$(LIBART_ENUM_OPERATOR_OUT_HEADER_FILES))
   ENUM_OPERATOR_OUT_GEN := $$(addprefix $$(GENERATED_SRC_DIR)/,$$(ENUM_OPERATOR_OUT_CC_FILES))
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 336a0cc..fd2cfeb 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -561,8 +561,8 @@
     jz   .Lslow_unlock
     movl LOCK_WORD_OFFSET(%eax), %ecx     // ecx := lock word
     movl %fs:THREAD_ID_OFFSET, %edx       // edx := thread id
-    test %ecx, %ecx
-    jb   .Lslow_unlock                    // lock word contains a monitor
+    test LITERAL(0xC0000000), %ecx
+    jnz  .Lslow_unlock                    // lock word contains a monitor
     cmpw %cx, %dx                         // does the thread id match?
     jne  .Lslow_unlock
     cmpl LITERAL(65536), %ecx
diff --git a/runtime/base/bit_field.h b/runtime/base/bit_field.h
new file mode 100644
index 0000000..e041bd0
--- /dev/null
+++ b/runtime/base/bit_field.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_BIT_FIELD_H_
+#define ART_RUNTIME_BASE_BIT_FIELD_H_
+
+#include "globals.h"
+#include "logging.h"
+
+namespace art {
+
+static const uword kUwordOne = 1U;
+
+// BitField is a template for encoding and decoding a bit field inside
+// an unsigned machine word.
+template<typename T, int position, int size>
+class BitField {
+ public:
+  // Tells whether the provided value fits into the bit field.
+  static bool IsValid(T value) {
+    return (static_cast<uword>(value) & ~((kUwordOne << size) - 1)) == 0;
+  }
+
+  // Returns a uword mask of the bit field.
+  static uword Mask() {
+    return (kUwordOne << size) - 1;
+  }
+
+  // Returns a uword mask of the bit field which can be applied directly to
+  // the raw unshifted bits.
+  static uword MaskInPlace() {
+    return ((kUwordOne << size) - 1) << position;
+  }
+
+  // Returns the shift count needed to right-shift the bit field to
+  // the least-significant bits.
+  static int Shift() {
+    return position;
+  }
+
+  // Returns the size of the bit field.
+  static int BitSize() {
+    return size;
+  }
+
+  // Returns a uword with the bit field value encoded.
+  static uword Encode(T value) {
+    DCHECK(IsValid(value));
+    return static_cast<uword>(value) << position;
+  }
+
+  // Extracts the bit field from the value.
+  static T Decode(uword value) {
+    return static_cast<T>((value >> position) & ((kUwordOne << size) - 1));
+  }
+
+  // Returns a uword with the bit field value encoded based on the
+  // original value. Only the bits corresponding to this bit field
+  // will be changed.
+  static uword Update(T value, uword original) {
+    DCHECK(IsValid(value));
+    return (static_cast<uword>(value) << position) |
+        (~MaskInPlace() & original);
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_BIT_FIELD_H_
diff --git a/runtime/base/bit_field_test.cc b/runtime/base/bit_field_test.cc
new file mode 100644
index 0000000..afeb2c4
--- /dev/null
+++ b/runtime/base/bit_field_test.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bit_field.h"
+#include "globals.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+TEST(BitFields, Test1) {
+  class TestBitFields : public BitField<int32_t, 1, 8> {};
+  ASSERT_TRUE(TestBitFields::IsValid(16));
+  ASSERT_TRUE(!TestBitFields::IsValid(256));
+  ASSERT_EQ(0x00ffU, TestBitFields::Mask());
+  ASSERT_EQ(0x001feU, TestBitFields::MaskInPlace());
+  ASSERT_EQ(1, TestBitFields::Shift());
+  ASSERT_EQ(8, TestBitFields::BitSize());
+  ASSERT_EQ(32U, TestBitFields::Encode(16));
+  ASSERT_EQ(16, TestBitFields::Decode(32));
+  ASSERT_EQ(2U, TestBitFields::Update(1, 16));
+}
+
+}  // namespace art
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 514ad4c..c52a588 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -126,14 +126,14 @@
   return os;
 }
 
-class DebugInstrumentationListener : public instrumentation::InstrumentationListener {
+class DebugInstrumentationListener FINAL : public instrumentation::InstrumentationListener {
  public:
   DebugInstrumentationListener() {}
   virtual ~DebugInstrumentationListener() {}
 
-  virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             mirror::ArtMethod* method, uint32_t dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void MethodEntered(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                     uint32_t dex_pc)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
@@ -141,10 +141,9 @@
     Dbg::PostLocationEvent(method, 0, this_object, Dbg::kMethodEntry, nullptr);
   }
 
-  virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            mirror::ArtMethod* method,
-                            uint32_t dex_pc, const JValue& return_value)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void MethodExited(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                    uint32_t dex_pc, const JValue& return_value)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
@@ -152,26 +151,41 @@
     Dbg::PostLocationEvent(method, dex_pc, this_object, Dbg::kMethodExit, &return_value);
   }
 
-  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            mirror::ArtMethod* method, uint32_t dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void MethodUnwind(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                    uint32_t dex_pc)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
                << " " << dex_pc;
   }
 
-  virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          mirror::ArtMethod* method, uint32_t new_dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  void DexPcMoved(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                  uint32_t new_dex_pc)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc);
   }
 
-  virtual void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
-                               mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
-                               mirror::Throwable* exception_object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Dbg::PostException(thread, throw_location, catch_method, catch_dex_pc, exception_object);
+  void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                 uint32_t dex_pc, mirror::ArtField* field)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Dbg::PostFieldAccessEvent(method, dex_pc, this_object, field);
   }
+
+  void FieldWritten(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                    uint32_t dex_pc, mirror::ArtField* field, const JValue& field_value)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Dbg::PostFieldModificationEvent(method, dex_pc, this_object, field, &field_value);
+  }
+
+  void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
+                       mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
+                       mirror::Throwable* exception_object)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Dbg::PostException(throw_location, catch_method, catch_dex_pc, exception_object);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DebugInstrumentationListener);
 } gDebugInstrumentationListener;
 
 // JDWP is allowed unless the Zygote forbids it.
@@ -231,6 +245,14 @@
   }
 }
 
+void DebugInvokeReq::Clear() {
+  invoke_needed = false;
+  receiver = nullptr;
+  thread = nullptr;
+  klass = nullptr;
+  method = nullptr;
+}
+
 void SingleStepControl::VisitRoots(RootCallback* callback, void* arg, uint32_t tid,
                                    RootType root_type) {
   if (method != nullptr) {
@@ -238,6 +260,16 @@
   }
 }
 
+bool SingleStepControl::ContainsDexPc(uint32_t dex_pc) const {
+  return dex_pcs.find(dex_pc) == dex_pcs.end();
+}
+
+void SingleStepControl::Clear() {
+  is_active = false;
+  method = nullptr;
+  dex_pcs.clear();
+}
+
 void DeoptimizationRequest::VisitRoots(RootCallback* callback, void* arg) {
   if (method != nullptr) {
     callback(reinterpret_cast<mirror::Object**>(&method), arg, 0, kRootDebugger);
@@ -607,6 +639,14 @@
   return gDisposed;
 }
 
+// All the instrumentation events the debugger is registered for.
+static constexpr uint32_t kListenerEvents = instrumentation::Instrumentation::kMethodEntered |
+                                            instrumentation::Instrumentation::kMethodExited |
+                                            instrumentation::Instrumentation::kDexPcMoved |
+                                            instrumentation::Instrumentation::kFieldRead |
+                                            instrumentation::Instrumentation::kFieldWritten |
+                                            instrumentation::Instrumentation::kExceptionCaught;
+
 void Dbg::GoActive() {
   // Enable all debugging features, including scans for breakpoints.
   // This is a no-op if we're already active.
@@ -633,11 +673,7 @@
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
   CHECK_NE(old_state, kRunnable);
   runtime->GetInstrumentation()->EnableDeoptimization();
-  runtime->GetInstrumentation()->AddListener(&gDebugInstrumentationListener,
-                                             instrumentation::Instrumentation::kMethodEntered |
-                                             instrumentation::Instrumentation::kMethodExited |
-                                             instrumentation::Instrumentation::kDexPcMoved |
-                                             instrumentation::Instrumentation::kExceptionCaught);
+  runtime->GetInstrumentation()->AddListener(&gDebugInstrumentationListener, kListenerEvents);
   gDebuggerActive = true;
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   runtime->GetThreadList()->ResumeAll();
@@ -668,11 +704,7 @@
       deoptimization_requests_.clear();
       full_deoptimization_event_count_ = 0U;
     }
-    runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
-                                                  instrumentation::Instrumentation::kMethodEntered |
-                                                  instrumentation::Instrumentation::kMethodExited |
-                                                  instrumentation::Instrumentation::kDexPcMoved |
-                                                  instrumentation::Instrumentation::kExceptionCaught);
+    runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener, kListenerEvents);
     runtime->GetInstrumentation()->DisableDeoptimization();
     gDebuggerActive = false;
   }
@@ -1572,6 +1604,13 @@
   OutputJValue(tag, return_value, pReply);
 }
 
+void Dbg::OutputFieldValue(JDWP::FieldId field_id, const JValue* field_value,
+                           JDWP::ExpandBuf* pReply) {
+  mirror::ArtField* f = FromFieldId(field_id);
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
+  OutputJValue(tag, field_value, pReply);
+}
+
 JDWP::JdwpError Dbg::GetBytecodes(JDWP::RefTypeId, JDWP::MethodId method_id,
                                   std::vector<uint8_t>& bytecodes)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -2444,21 +2483,70 @@
   return visitor.error_;
 }
 
+JDWP::ObjectId Dbg::GetThisObjectIdForEvent(mirror::Object* this_object) {
+  // If 'this_object' isn't already in the registry, we know that we're not looking for it, so
+  // there's no point adding it to the registry and burning through ids.
+  // When registering an event request with an instance filter, we've been given an existing object
+  // id so it must already be present in the registry when the event fires.
+  JDWP::ObjectId this_id = 0;
+  if (this_object != nullptr && gRegistry->Contains(this_object)) {
+    this_id = gRegistry->Add(this_object);
+  }
+  return this_id;
+}
+
 void Dbg::PostLocationEvent(mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
                             int event_flags, const JValue* return_value) {
+  if (!IsDebuggerActive()) {
+    return;
+  }
+  DCHECK(m != nullptr);
+  DCHECK_EQ(m->IsStatic(), this_object == nullptr);
   JDWP::JdwpLocation location;
   SetLocation(location, m, dex_pc);
 
-  // If 'this_object' isn't already in the registry, we know that we're not looking for it,
-  // so there's no point adding it to the registry and burning through ids.
-  JDWP::ObjectId this_id = 0;
-  if (gRegistry->Contains(this_object)) {
-    this_id = gRegistry->Add(this_object);
-  }
+  // We need 'this' for InstanceOnly filters only.
+  JDWP::ObjectId this_id = GetThisObjectIdForEvent(this_object);
   gJdwpState->PostLocationEvent(&location, this_id, event_flags, return_value);
 }
 
-void Dbg::PostException(Thread* thread, const ThrowLocation& throw_location,
+void Dbg::PostFieldAccessEvent(mirror::ArtMethod* m, int dex_pc,
+                               mirror::Object* this_object, mirror::ArtField* f) {
+  if (!IsDebuggerActive()) {
+    return;
+  }
+  DCHECK(m != nullptr);
+  DCHECK(f != nullptr);
+  JDWP::JdwpLocation location;
+  SetLocation(location, m, dex_pc);
+
+  JDWP::RefTypeId type_id = gRegistry->AddRefType(f->GetDeclaringClass());
+  JDWP::FieldId field_id = ToFieldId(f);
+  JDWP::ObjectId this_id = gRegistry->Add(this_object);
+
+  gJdwpState->PostFieldEvent(&location, type_id, field_id, this_id, nullptr, false);
+}
+
+void Dbg::PostFieldModificationEvent(mirror::ArtMethod* m, int dex_pc,
+                                     mirror::Object* this_object, mirror::ArtField* f,
+                                     const JValue* field_value) {
+  if (!IsDebuggerActive()) {
+    return;
+  }
+  DCHECK(m != nullptr);
+  DCHECK(f != nullptr);
+  DCHECK(field_value != nullptr);
+  JDWP::JdwpLocation location;
+  SetLocation(location, m, dex_pc);
+
+  JDWP::RefTypeId type_id = gRegistry->AddRefType(f->GetDeclaringClass());
+  JDWP::FieldId field_id = ToFieldId(f);
+  JDWP::ObjectId this_id = gRegistry->Add(this_object);
+
+  gJdwpState->PostFieldEvent(&location, type_id, field_id, this_id, field_value, true);
+}
+
+void Dbg::PostException(const ThrowLocation& throw_location,
                         mirror::ArtMethod* catch_method,
                         uint32_t catch_dex_pc, mirror::Throwable* exception_object) {
   if (!IsDebuggerActive()) {
@@ -2470,8 +2558,8 @@
   JDWP::JdwpLocation catch_location;
   SetLocation(catch_location, catch_method, catch_dex_pc);
 
-  // We need 'this' for InstanceOnly filters.
-  JDWP::ObjectId this_id = gRegistry->Add(throw_location.GetThis());
+  // We need 'this' for InstanceOnly filters only.
+  JDWP::ObjectId this_id = GetThisObjectIdForEvent(throw_location.GetThis());
   JDWP::ObjectId exception_id = gRegistry->Add(exception_object);
   JDWP::RefTypeId exception_class_id = gRegistry->AddRefType(exception_object->GetClass());
 
@@ -2521,7 +2609,7 @@
       } else if (single_step_control->step_size == JDWP::SS_MIN) {
         event_flags |= kSingleStep;
         VLOG(jdwp) << "SS new instruction";
-      } else if (single_step_control->dex_pcs.find(dex_pc) == single_step_control->dex_pcs.end()) {
+      } else if (single_step_control->ContainsDexPc(dex_pc)) {
         event_flags |= kSingleStep;
         VLOG(jdwp) << "SS new line";
       }
@@ -2543,7 +2631,7 @@
         if (single_step_control->step_size == JDWP::SS_MIN) {
           event_flags |= kSingleStep;
           VLOG(jdwp) << "SS new instruction";
-        } else if (single_step_control->dex_pcs.find(dex_pc) == single_step_control->dex_pcs.end()) {
+        } else if (single_step_control->ContainsDexPc(dex_pc)) {
           event_flags |= kSingleStep;
           VLOG(jdwp) << "SS new line";
         }
@@ -2910,8 +2998,9 @@
   //
 
   struct DebugCallbackContext {
-    explicit DebugCallbackContext(SingleStepControl* single_step_control, int32_t line_number)
-      : single_step_control_(single_step_control), line_number_(line_number),
+    explicit DebugCallbackContext(SingleStepControl* single_step_control, int32_t line_number,
+                                  const DexFile::CodeItem* code_item)
+      : single_step_control_(single_step_control), line_number_(line_number), code_item_(code_item),
         last_pc_valid(false), last_pc(0) {
     }
 
@@ -2938,7 +3027,7 @@
     ~DebugCallbackContext() {
       // If the line number was the last in the position table...
       if (last_pc_valid) {
-        size_t end = MethodHelper(single_step_control_->method).GetCodeItem()->insns_size_in_code_units_;
+        size_t end = code_item_->insns_size_in_code_units_;
         for (uint32_t dex_pc = last_pc; dex_pc < end; ++dex_pc) {
           single_step_control_->dex_pcs.insert(dex_pc);
         }
@@ -2947,15 +3036,17 @@
 
     SingleStepControl* const single_step_control_;
     const int32_t line_number_;
+    const DexFile::CodeItem* const code_item_;
     bool last_pc_valid;
     uint32_t last_pc;
   };
   single_step_control->dex_pcs.clear();
   mirror::ArtMethod* m = single_step_control->method;
   if (!m->IsNative()) {
-    DebugCallbackContext context(single_step_control, line_number);
     MethodHelper mh(m);
-    mh.GetDexFile().DecodeDebugInfo(mh.GetCodeItem(), m->IsStatic(), m->GetDexMethodIndex(),
+    const DexFile::CodeItem* const code_item = mh.GetCodeItem();
+    DebugCallbackContext context(single_step_control, line_number, code_item);
+    mh.GetDexFile().DecodeDebugInfo(code_item, m->IsStatic(), m->GetDexMethodIndex(),
                                     DebugCallbackContext::Callback, NULL, &context);
   }
 
@@ -2975,8 +3066,8 @@
     VLOG(jdwp) << "Single-step current line: " << line_number;
     VLOG(jdwp) << "Single-step current stack depth: " << single_step_control->stack_depth;
     VLOG(jdwp) << "Single-step dex_pc values:";
-    for (std::set<uint32_t>::iterator it = single_step_control->dex_pcs.begin(); it != single_step_control->dex_pcs.end(); ++it) {
-      VLOG(jdwp) << StringPrintf(" %#x", *it);
+    for (uint32_t dex_pc : single_step_control->dex_pcs) {
+      VLOG(jdwp) << StringPrintf(" %#x", dex_pc);
     }
   }
 
@@ -2991,8 +3082,7 @@
   if (error == JDWP::ERR_NONE) {
     SingleStepControl* single_step_control = thread->GetSingleStepControl();
     DCHECK(single_step_control != nullptr);
-    single_step_control->is_active = false;
-    single_step_control->dex_pcs.clear();
+    single_step_control->Clear();
   }
 }
 
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 23c9c6a..b3e94c3 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -35,6 +35,7 @@
 
 namespace art {
 namespace mirror {
+class ArtField;
 class ArtMethod;
 class Class;
 class Object;
@@ -85,6 +86,8 @@
   void VisitRoots(RootCallback* callback, void* arg, uint32_t tid, RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void Clear();
+
  private:
   DISALLOW_COPY_AND_ASSIGN(DebugInvokeReq);
 };
@@ -118,6 +121,10 @@
   void VisitRoots(RootCallback* callback, void* arg, uint32_t tid, RootType root_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  bool ContainsDexPc(uint32_t dex_pc) const;
+
+  void Clear();
+
  private:
   DISALLOW_COPY_AND_ASSIGN(SingleStepControl);
 };
@@ -297,6 +304,9 @@
   static void OutputMethodReturnValue(JDWP::MethodId method_id, const JValue* return_value,
                                       JDWP::ExpandBuf* pReply)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void OutputFieldValue(JDWP::FieldId field_id, const JValue* field_value,
+                               JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError GetBytecodes(JDWP::RefTypeId class_id, JDWP::MethodId method_id,
                                       std::vector<uint8_t>& bytecodes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -411,8 +421,14 @@
                                 mirror::Object* thisPtr, int eventFlags,
                                 const JValue* return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void PostException(Thread* thread, const ThrowLocation& throw_location,
-                            mirror::ArtMethod* catch_method,
+  static void PostFieldAccessEvent(mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
+                                   mirror::ArtField* f)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void PostFieldModificationEvent(mirror::ArtMethod* m, int dex_pc,
+                                         mirror::Object* this_object, mirror::ArtField* f,
+                                         const JValue* field_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void PostException(const ThrowLocation& throw_location, mirror::ArtMethod* catch_method,
                             uint32_t catch_dex_pc, mirror::Throwable* exception)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostThreadStart(Thread* t)
@@ -538,6 +554,9 @@
   static void PostThreadStartOrStop(Thread*, uint32_t)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static JDWP::ObjectId GetThisObjectIdForEvent(mirror::Object* this_object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static void ProcessDeoptimizationRequest(const DeoptimizationRequest& request)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index 564168e..a1d001e 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -43,7 +43,7 @@
 }
 
 template <typename Visitor>
-inline size_t CardTable::Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end,
+inline size_t CardTable::Scan(ContinuousSpaceBitmap* bitmap, byte* scan_begin, byte* scan_end,
                               const Visitor& visitor, const byte minimum_age) const {
   DCHECK(bitmap->HasAddress(scan_begin));
   DCHECK(bitmap->HasAddress(scan_end - 1));  // scan_end is the byte after the last byte we scan.
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 8b7bfd3..8d5dc07 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -38,7 +38,7 @@
 
 namespace accounting {
 
-class SpaceBitmap;
+template<size_t kAlignment> class SpaceBitmap;
 
 // Maintain a card table from the the write barrier. All writes of
 // non-NULL values to heap addresses should go through an entry in
@@ -102,7 +102,8 @@
   // For every dirty at least minumum age between begin and end invoke the visitor with the
   // specified argument. Returns how many cards the visitor was run on.
   template <typename Visitor>
-  size_t Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end, const Visitor& visitor,
+  size_t Scan(SpaceBitmap<kObjectAlignment>* bitmap, byte* scan_begin, byte* scan_end,
+              const Visitor& visitor,
               const byte minimum_age = kCardDirty) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/accounting/heap_bitmap-inl.h b/runtime/gc/accounting/heap_bitmap-inl.h
index 04e85d2..ed7b427 100644
--- a/runtime/gc/accounting/heap_bitmap-inl.h
+++ b/runtime/gc/accounting/heap_bitmap-inl.h
@@ -37,16 +37,16 @@
 }
 
 inline bool HeapBitmap::Test(const mirror::Object* obj) {
-  SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
+  ContinuousSpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
   if (LIKELY(bitmap != nullptr)) {
     return bitmap->Test(obj);
   } else {
-    return GetDiscontinuousSpaceObjectSet(obj) != NULL;
+    return GetDiscontinuousSpaceObjectSet(obj) != nullptr;
   }
 }
 
 inline void HeapBitmap::Clear(const mirror::Object* obj) {
-  SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
+  ContinuousSpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
   if (LIKELY(bitmap != nullptr)) {
     bitmap->Clear(obj);
   } else {
@@ -57,7 +57,7 @@
 }
 
 inline void HeapBitmap::Set(const mirror::Object* obj) {
-  SpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
+  ContinuousSpaceBitmap* bitmap = GetContinuousSpaceBitmap(obj);
   if (LIKELY(bitmap != NULL)) {
     bitmap->Set(obj);
   } else {
@@ -67,7 +67,7 @@
   }
 }
 
-inline SpaceBitmap* HeapBitmap::GetContinuousSpaceBitmap(const mirror::Object* obj) const {
+inline ContinuousSpaceBitmap* HeapBitmap::GetContinuousSpaceBitmap(const mirror::Object* obj) const {
   for (const auto& bitmap : continuous_space_bitmaps_) {
     if (bitmap->HasAddress(obj)) {
       return bitmap;
diff --git a/runtime/gc/accounting/heap_bitmap.cc b/runtime/gc/accounting/heap_bitmap.cc
index f94cf24..1db886c 100644
--- a/runtime/gc/accounting/heap_bitmap.cc
+++ b/runtime/gc/accounting/heap_bitmap.cc
@@ -16,13 +16,15 @@
 
 #include "heap_bitmap.h"
 
+#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/space/space.h"
 
 namespace art {
 namespace gc {
 namespace accounting {
 
-void HeapBitmap::ReplaceBitmap(SpaceBitmap* old_bitmap, SpaceBitmap* new_bitmap) {
+void HeapBitmap::ReplaceBitmap(ContinuousSpaceBitmap* old_bitmap,
+                               ContinuousSpaceBitmap* new_bitmap) {
   for (auto& bitmap : continuous_space_bitmaps_) {
     if (bitmap == old_bitmap) {
       bitmap = new_bitmap;
@@ -42,7 +44,7 @@
   LOG(FATAL) << "object set " << static_cast<const void*>(old_set) << " not found";
 }
 
-void HeapBitmap::AddContinuousSpaceBitmap(accounting::SpaceBitmap* bitmap) {
+void HeapBitmap::AddContinuousSpaceBitmap(accounting::ContinuousSpaceBitmap* bitmap) {
   DCHECK(bitmap != NULL);
 
   // Check for interval overlap.
@@ -55,14 +57,14 @@
   continuous_space_bitmaps_.push_back(bitmap);
 }
 
-void HeapBitmap::RemoveContinuousSpaceBitmap(accounting::SpaceBitmap* bitmap) {
+void HeapBitmap::RemoveContinuousSpaceBitmap(accounting::ContinuousSpaceBitmap* bitmap) {
   auto it = std::find(continuous_space_bitmaps_.begin(), continuous_space_bitmaps_.end(), bitmap);
   DCHECK(it != continuous_space_bitmaps_.end());
   continuous_space_bitmaps_.erase(it);
 }
 
 void HeapBitmap::AddDiscontinuousObjectSet(ObjectSet* set) {
-  DCHECK(set != NULL);
+  DCHECK(set != nullptr);
   discontinuous_space_sets_.push_back(set);
 }
 
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index f729c0e..61a2429 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -34,7 +34,7 @@
   bool Test(const mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
   void Clear(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
   void Set(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-  SpaceBitmap* GetContinuousSpaceBitmap(const mirror::Object* obj) const;
+  ContinuousSpaceBitmap* GetContinuousSpaceBitmap(const mirror::Object* obj) const;
   ObjectSet* GetDiscontinuousSpaceObjectSet(const mirror::Object* obj) const;
 
   void Walk(ObjectCallback* callback, void* arg)
@@ -46,7 +46,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find and replace a bitmap pointer, this is used by for the bitmap swapping in the GC.
-  void ReplaceBitmap(SpaceBitmap* old_bitmap, SpaceBitmap* new_bitmap)
+  void ReplaceBitmap(ContinuousSpaceBitmap* old_bitmap, ContinuousSpaceBitmap* new_bitmap)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Find and replace a object set pointer, this is used by for the bitmap swapping in the GC.
@@ -58,13 +58,14 @@
  private:
   const Heap* const heap_;
 
-  void AddContinuousSpaceBitmap(SpaceBitmap* bitmap);
-  void RemoveContinuousSpaceBitmap(SpaceBitmap* bitmap);
+  void AddContinuousSpaceBitmap(ContinuousSpaceBitmap* bitmap);
+  void RemoveContinuousSpaceBitmap(ContinuousSpaceBitmap* bitmap);
   void AddDiscontinuousObjectSet(ObjectSet* set);
   void RemoveDiscontinuousObjectSet(ObjectSet* set);
 
   // Bitmaps covering continuous spaces.
-  std::vector<SpaceBitmap*, GcAllocator<SpaceBitmap*>> continuous_space_bitmaps_;
+  std::vector<ContinuousSpaceBitmap*, GcAllocator<ContinuousSpaceBitmap*>>
+      continuous_space_bitmaps_;
 
   // Sets covering discontinuous spaces.
   std::vector<ObjectSet*, GcAllocator<ObjectSet*>> discontinuous_space_sets_;
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 34ca654..d744dee 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -19,6 +19,7 @@
 #include "base/stl_util.h"
 #include "card_table-inl.h"
 #include "heap_bitmap.h"
+#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/collector/mark_sweep.h"
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/heap.h"
@@ -222,7 +223,7 @@
 
   // Check the references of each clean card which is also in the mod union table.
   CardTable* card_table = heap_->GetCardTable();
-  SpaceBitmap* live_bitmap = space_->GetLiveBitmap();
+  ContinuousSpaceBitmap* live_bitmap = space_->GetLiveBitmap();
   for (const auto& ref_pair : references_) {
     const byte* card = ref_pair.first;
     if (*card == CardTable::kCardClean) {
@@ -272,7 +273,7 @@
     uintptr_t end = start + CardTable::kCardSize;
     auto* space = heap_->FindContinuousSpaceFromObject(reinterpret_cast<Object*>(start), false);
     DCHECK(space != nullptr);
-    SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
     live_bitmap->VisitMarkedRange(start, end, add_visitor);
 
     // Update the corresponding references for the card.
@@ -312,7 +313,7 @@
                                                      void* arg) {
   CardTable* card_table = heap_->GetCardTable();
   ModUnionScanImageRootVisitor scan_visitor(callback, arg);
-  SpaceBitmap* bitmap = space_->GetLiveBitmap();
+  ContinuousSpaceBitmap* bitmap = space_->GetLiveBitmap();
   for (const byte* card_addr : cleared_cards_) {
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
     DCHECK(space_->HasAddress(reinterpret_cast<Object*>(start)));
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index c3a90e2..5ae7c77 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -44,7 +44,6 @@
 
 namespace accounting {
 
-class SpaceBitmap;
 class HeapBitmap;
 
 // The mod-union table is the union of modified cards. It is used to allow the card table to be
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index 56f7caa..044216e 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -112,7 +112,7 @@
   bool contains_reference_to_target_space = false;
   RememberedSetObjectVisitor obj_visitor(callback, target_space,
                                          &contains_reference_to_target_space, arg);
-  SpaceBitmap* bitmap = space_->GetLiveBitmap();
+  ContinuousSpaceBitmap* bitmap = space_->GetLiveBitmap();
   CardSet remove_card_set;
   for (byte* const card_addr : dirty_cards_) {
     contains_reference_to_target_space = false;
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 880ff1f..ed140e0 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -17,14 +17,26 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_INL_H_
 #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_INL_H_
 
+#include "space_bitmap.h"
+
 #include "base/logging.h"
+#include "dex_file-inl.h"
+#include "heap_bitmap.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "object_utils.h"
+#include "space_bitmap-inl.h"
+#include "UniquePtr.h"
 #include "utils.h"
 
 namespace art {
 namespace gc {
 namespace accounting {
 
-inline bool SpaceBitmap::AtomicTestAndSet(const mirror::Object* obj) {
+template<size_t kAlignment>
+inline bool SpaceBitmap<kAlignment>::AtomicTestAndSet(const mirror::Object* obj) {
   uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
   DCHECK_GE(addr, heap_begin_);
   const uintptr_t offset = addr - heap_begin_;
@@ -45,7 +57,8 @@
   return false;
 }
 
-inline bool SpaceBitmap::Test(const mirror::Object* obj) const {
+template<size_t kAlignment>
+inline bool SpaceBitmap<kAlignment>::Test(const mirror::Object* obj) const {
   uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
   DCHECK(HasAddress(obj)) << obj;
   DCHECK(bitmap_begin_ != NULL);
@@ -54,9 +67,9 @@
   return (bitmap_begin_[OffsetToIndex(offset)] & OffsetToMask(offset)) != 0;
 }
 
-template <typename Visitor>
-void SpaceBitmap::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
-                                   const Visitor& visitor) const {
+template<size_t kAlignment> template<typename Visitor>
+inline void SpaceBitmap<kAlignment>::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
+                                                      const Visitor& visitor) const {
   DCHECK_LT(visit_begin, visit_end);
 #if 0
   for (uintptr_t i = visit_begin; i < visit_end; i += kAlignment) {
@@ -148,7 +161,8 @@
 #endif
 }
 
-inline bool SpaceBitmap::Modify(const mirror::Object* obj, bool do_set) {
+template<size_t kAlignment> template<bool kSetBit>
+inline bool SpaceBitmap<kAlignment>::Modify(const mirror::Object* obj) {
   uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
   DCHECK_GE(addr, heap_begin_);
   const uintptr_t offset = addr - heap_begin_;
@@ -157,15 +171,24 @@
   DCHECK_LT(index, bitmap_size_ / kWordSize) << " bitmap_size_ = " << bitmap_size_;
   uword* address = &bitmap_begin_[index];
   uword old_word = *address;
-  if (do_set) {
+  if (kSetBit) {
     *address = old_word | mask;
   } else {
     *address = old_word & ~mask;
   }
-  DCHECK_EQ(Test(obj), do_set);
+  DCHECK_EQ(Test(obj), kSetBit);
   return (old_word & mask) != 0;
 }
 
+template<size_t kAlignment>
+inline std::ostream& operator << (std::ostream& stream, const SpaceBitmap<kAlignment>& bitmap) {
+  return stream
+    << bitmap.GetName() << "["
+    << "begin=" << reinterpret_cast<const void*>(bitmap.HeapBegin())
+    << ",end=" << reinterpret_cast<const void*>(bitmap.HeapLimit())
+    << "]";
+}
+
 }  // namespace accounting
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 1957c21..7eed05a 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -14,51 +14,24 @@
  * limitations under the License.
  */
 
-#include "base/logging.h"
-#include "dex_file-inl.h"
-#include "heap_bitmap.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/class-inl.h"
-#include "mirror/object-inl.h"
-#include "mirror/object_array-inl.h"
-#include "object_utils.h"
 #include "space_bitmap-inl.h"
-#include "UniquePtr.h"
-#include "utils.h"
 
 namespace art {
 namespace gc {
 namespace accounting {
 
-std::string SpaceBitmap::GetName() const {
-  return name_;
-}
-
-void SpaceBitmap::SetName(const std::string& name) {
-  name_ = name;
-}
-
-std::string SpaceBitmap::Dump() const {
-  return StringPrintf("%s: %p-%p", name_.c_str(),
-                      reinterpret_cast<void*>(HeapBegin()),
-                      reinterpret_cast<void*>(HeapLimit()));
-}
-
-void ObjectSet::Walk(ObjectCallback* callback, void* arg) {
-  for (const mirror::Object* obj : contained_) {
-    callback(const_cast<mirror::Object*>(obj), arg);
-  }
-}
-
-SpaceBitmap* SpaceBitmap::CreateFromMemMap(const std::string& name, MemMap* mem_map,
-                                           byte* heap_begin, size_t heap_capacity) {
+template<size_t kAlignment>
+SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap(
+    const std::string& name, MemMap* mem_map, byte* heap_begin, size_t heap_capacity) {
   CHECK(mem_map != nullptr);
   uword* bitmap_begin = reinterpret_cast<uword*>(mem_map->Begin());
   size_t bitmap_size = OffsetToIndex(RoundUp(heap_capacity, kAlignment * kBitsPerWord)) * kWordSize;
   return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin);
 }
 
-SpaceBitmap* SpaceBitmap::Create(const std::string& name, byte* heap_begin, size_t heap_capacity) {
+template<size_t kAlignment>
+SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::Create(
+    const std::string& name, byte* heap_begin, size_t heap_capacity) {
   CHECK(heap_begin != NULL);
   // Round up since heap_capacity is not necessarily a multiple of kAlignment * kBitsPerWord.
   size_t bitmap_size = OffsetToIndex(RoundUp(heap_capacity, kAlignment * kBitsPerWord)) * kWordSize;
@@ -72,10 +45,8 @@
   return CreateFromMemMap(name, mem_map.release(), heap_begin, heap_capacity);
 }
 
-// Clean up any resources associated with the bitmap.
-SpaceBitmap::~SpaceBitmap() {}
-
-void SpaceBitmap::SetHeapLimit(uintptr_t new_end) {
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::SetHeapLimit(uintptr_t new_end) {
   DCHECK(IsAligned<kBitsPerWord * kAlignment>(new_end));
   size_t new_size = OffsetToIndex(new_end - heap_begin_) * kWordSize;
   if (new_size < bitmap_size_) {
@@ -85,7 +56,8 @@
   // should be marked.
 }
 
-void SpaceBitmap::Clear() {
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::Clear() {
   if (bitmap_begin_ != NULL) {
     // This returns the memory to the system.  Successive page faults will return zeroed memory.
     int result = madvise(bitmap_begin_, bitmap_size_, MADV_DONTNEED);
@@ -95,14 +67,14 @@
   }
 }
 
-void SpaceBitmap::CopyFrom(SpaceBitmap* source_bitmap) {
+template<size_t kAlignment>
+inline void SpaceBitmap<kAlignment>::CopyFrom(SpaceBitmap* source_bitmap) {
   DCHECK_EQ(Size(), source_bitmap->Size());
   std::copy(source_bitmap->Begin(), source_bitmap->Begin() + source_bitmap->Size() / kWordSize, Begin());
 }
 
-// Visits set bits in address order.  The callback is not permitted to
-// change the bitmap bits or max during the traversal.
-void SpaceBitmap::Walk(ObjectCallback* callback, void* arg) {
+template<size_t kAlignment>
+inline void SpaceBitmap<kAlignment>::Walk(ObjectCallback* callback, void* arg) {
   CHECK(bitmap_begin_ != NULL);
   CHECK(callback != NULL);
 
@@ -122,15 +94,11 @@
   }
 }
 
-// Walk through the bitmaps in increasing address order, and find the
-// object pointers that correspond to garbage objects.  Call
-// <callback> zero or more times with lists of these object pointers.
-//
-// The callback is not permitted to increase the max of either bitmap.
-void SpaceBitmap::SweepWalk(const SpaceBitmap& live_bitmap,
-                            const SpaceBitmap& mark_bitmap,
-                            uintptr_t sweep_begin, uintptr_t sweep_end,
-                            SpaceBitmap::SweepCallback* callback, void* arg) {
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::SweepWalk(const SpaceBitmap<kAlignment>& live_bitmap,
+                                               const SpaceBitmap<kAlignment>& mark_bitmap,
+                                               uintptr_t sweep_begin, uintptr_t sweep_end,
+                                               SpaceBitmap::SweepCallback* callback, void* arg) {
   CHECK(live_bitmap.bitmap_begin_ != NULL);
   CHECK(mark_bitmap.bitmap_begin_ != NULL);
   CHECK_EQ(live_bitmap.heap_begin_, mark_bitmap.heap_begin_);
@@ -174,13 +142,10 @@
   }
 }
 
-static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
-                              void* arg);
-
-// Walk instance fields of the given Class. Separate function to allow recursion on the super
-// class.
-static void WalkInstanceFields(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
-                               mirror::Class* klass, void* arg)
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::WalkInstanceFields(SpaceBitmap<kAlignment>* visited,
+                                                 ObjectCallback* callback, mirror::Object* obj,
+                                                 mirror::Class* klass, void* arg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Visit fields of parent classes first.
   mirror::Class* super = klass->GetSuperClass();
@@ -203,10 +168,10 @@
   }
 }
 
-// For an unvisited object, visit it then all its children found via fields.
-static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
-                              void* arg)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::WalkFieldsInOrder(SpaceBitmap<kAlignment>* visited,
+                                                       ObjectCallback* callback,
+                                                       mirror::Object* obj, void* arg) {
   if (visited->Test(obj)) {
     return;
   }
@@ -244,14 +209,13 @@
   }
 }
 
-// Visits set bits with an in order traversal.  The callback is not permitted to change the bitmap
-// bits or max during the traversal.
-void SpaceBitmap::InOrderWalk(ObjectCallback* callback, void* arg) {
-  UniquePtr<SpaceBitmap> visited(Create("bitmap for in-order walk",
-                                       reinterpret_cast<byte*>(heap_begin_),
-                                       IndexToOffset(bitmap_size_ / kWordSize)));
-  CHECK(bitmap_begin_ != NULL);
-  CHECK(callback != NULL);
+template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::InOrderWalk(ObjectCallback* callback, void* arg) {
+  UniquePtr<SpaceBitmap<kAlignment>> visited(
+      Create("bitmap for in-order walk", reinterpret_cast<byte*>(heap_begin_),
+             IndexToOffset(bitmap_size_ / kWordSize)));
+  CHECK(bitmap_begin_ != nullptr);
+  CHECK(callback != nullptr);
   uintptr_t end = Size() / kWordSize;
   for (uintptr_t i = 0; i < end; ++i) {
     // Need uint for unsigned shift.
@@ -268,14 +232,15 @@
   }
 }
 
-std::ostream& operator << (std::ostream& stream, const SpaceBitmap& bitmap) {
-  return stream
-    << bitmap.GetName() << "["
-    << "begin=" << reinterpret_cast<const void*>(bitmap.HeapBegin())
-    << ",end=" << reinterpret_cast<const void*>(bitmap.HeapLimit())
-    << "]";
+void ObjectSet::Walk(ObjectCallback* callback, void* arg) {
+  for (const mirror::Object* obj : contained_) {
+    callback(const_cast<mirror::Object*>(obj), arg);
+  }
 }
 
+template class SpaceBitmap<kObjectAlignment>;
+template class SpaceBitmap<kPageSize>;
+
 }  // namespace accounting
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index a88f3e4..b90a799 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -38,11 +38,9 @@
 namespace gc {
 namespace accounting {
 
+template<size_t kAlignment>
 class SpaceBitmap {
  public:
-  // Alignment of objects within spaces.
-  static const size_t kAlignment = 8;
-
   typedef void ScanCallback(mirror::Object* obj, void* finger, void* arg);
 
   typedef void SweepCallback(size_t ptr_count, mirror::Object** ptrs, void* arg);
@@ -57,30 +55,31 @@
   static SpaceBitmap* CreateFromMemMap(const std::string& name, MemMap* mem_map,
                                        byte* heap_begin, size_t heap_capacity);
 
-  ~SpaceBitmap();
+  ~SpaceBitmap() {
+  }
 
   // <offset> is the difference from .base to a pointer address.
   // <index> is the index of .bits that contains the bit representing
   //         <offset>.
-  static size_t OffsetToIndex(size_t offset) {
+  static size_t OffsetToIndex(size_t offset) ALWAYS_INLINE {
     return offset / kAlignment / kBitsPerWord;
   }
 
-  static uintptr_t IndexToOffset(size_t index) {
+  static uintptr_t IndexToOffset(size_t index) ALWAYS_INLINE {
     return static_cast<uintptr_t>(index * kAlignment * kBitsPerWord);
   }
 
   // Bits are packed in the obvious way.
-  static uword OffsetToMask(uintptr_t offset) {
+  static uword OffsetToMask(uintptr_t offset) ALWAYS_INLINE {
     return (static_cast<size_t>(1)) << ((offset / kAlignment) % kBitsPerWord);
   }
 
-  inline bool Set(const mirror::Object* obj) {
-    return Modify(obj, true);
+  bool Set(const mirror::Object* obj) ALWAYS_INLINE {
+    return Modify<true>(obj);
   }
 
-  inline bool Clear(const mirror::Object* obj) {
-    return Modify(obj, false);
+  bool Clear(const mirror::Object* obj) ALWAYS_INLINE {
+    return Modify<false>(obj);
   }
 
   // Returns true if the object was previously marked.
@@ -123,20 +122,26 @@
     }
   }
 
-  /**
-   * Visit the live objects in the range [visit_begin, visit_end).
-   */
+  // Visit the live objects in the range [visit_begin, visit_end).
+  // TODO: Use lock annotations when clang is fixed.
+  // EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template <typename Visitor>
   void VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end, const Visitor& visitor) const
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      NO_THREAD_SAFETY_ANALYSIS;
 
+  // Visits set bits in address order.  The callback is not permitted to change the bitmap bits or
+  // max during the traversal.
   void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Visits set bits with an in order traversal.  The callback is not permitted to change the bitmap
+  // bits or max during the traversal.
   void InOrderWalk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
+  // Walk through the bitmaps in increasing address order, and find the object pointers that
+  // correspond to garbage objects.  Call <callback> zero or more times with lists of these object
+  // pointers. The callback is not permitted to increase the max of either bitmap.
   static void SweepWalk(const SpaceBitmap& live, const SpaceBitmap& mark, uintptr_t base,
                         uintptr_t max, SweepCallback* thunk, void* arg);
 
@@ -169,10 +174,18 @@
   // Set the max address which can covered by the bitmap.
   void SetHeapLimit(uintptr_t new_end);
 
-  std::string GetName() const;
-  void SetName(const std::string& name);
+  std::string GetName() const {
+    return name_;
+  }
 
-  std::string Dump() const;
+  void SetName(const std::string& name) {
+    name_ = name;
+  }
+
+  std::string Dump() const {
+    return StringPrintf("%s: %p-%p", name_.c_str(), reinterpret_cast<void*>(HeapBegin()),
+                        reinterpret_cast<void*>(HeapLimit()));
+  }
 
   const void* GetObjectWordAddress(const mirror::Object* obj) const {
     uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
@@ -190,7 +203,17 @@
         heap_begin_(reinterpret_cast<uintptr_t>(heap_begin)),
         name_(name) {}
 
-  bool Modify(const mirror::Object* obj, bool do_set);
+  template<bool kSetBit>
+  bool Modify(const mirror::Object* obj);
+
+  // For an unvisited object, visit it then all its children found via fields.
+  static void WalkFieldsInOrder(SpaceBitmap* visited, ObjectCallback* callback, mirror::Object* obj,
+                                void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Walk instance fields of the given Class. Separate function to allow recursion on the super
+  // class.
+  static void WalkInstanceFields(SpaceBitmap<kAlignment>* visited, ObjectCallback* callback,
+                                 mirror::Object* obj, mirror::Class* klass, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Backing storage for bitmap.
   UniquePtr<MemMap> mem_map_;
@@ -272,7 +295,12 @@
   Objects contained_;
 };
 
-std::ostream& operator << (std::ostream& stream, const SpaceBitmap& bitmap);
+typedef SpaceBitmap<kObjectAlignment> ContinuousSpaceBitmap;
+// TODO: Replace usage of ObjectSet with LargeObjectBitmap.
+typedef SpaceBitmap<kLargeObjectAlignment> LargeObjectBitmap;
+
+template<size_t kAlignment>
+std::ostream& operator << (std::ostream& stream, const SpaceBitmap<kAlignment>& bitmap);
 
 }  // namespace accounting
 }  // namespace gc
diff --git a/runtime/gc/accounting/space_bitmap_test.cc b/runtime/gc/accounting/space_bitmap_test.cc
index 68994a8..7c18052 100644
--- a/runtime/gc/accounting/space_bitmap_test.cc
+++ b/runtime/gc/accounting/space_bitmap_test.cc
@@ -32,14 +32,15 @@
 TEST_F(SpaceBitmapTest, Init) {
   byte* heap_begin = reinterpret_cast<byte*>(0x10000000);
   size_t heap_capacity = 16 * MB;
-  UniquePtr<SpaceBitmap> space_bitmap(SpaceBitmap::Create("test bitmap",
-                                                          heap_begin, heap_capacity));
+  UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+      ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   EXPECT_TRUE(space_bitmap.get() != NULL);
 }
 
 class BitmapVerify {
  public:
-  BitmapVerify(SpaceBitmap* bitmap, const mirror::Object* begin, const mirror::Object* end)
+  BitmapVerify(ContinuousSpaceBitmap* bitmap, const mirror::Object* begin,
+               const mirror::Object* end)
     : bitmap_(bitmap),
       begin_(begin),
       end_(end) {}
@@ -50,7 +51,7 @@
     EXPECT_EQ(bitmap_->Test(obj), ((reinterpret_cast<uintptr_t>(obj) & 0xF) != 0));
   }
 
-  SpaceBitmap* bitmap_;
+  ContinuousSpaceBitmap* bitmap_;
   const mirror::Object* begin_;
   const mirror::Object* end_;
 };
@@ -59,14 +60,14 @@
   byte* heap_begin = reinterpret_cast<byte*>(0x10000000);
   size_t heap_capacity = 16 * MB;
 
-  UniquePtr<SpaceBitmap> space_bitmap(SpaceBitmap::Create("test bitmap",
-                                                          heap_begin, heap_capacity));
+  UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+      ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   EXPECT_TRUE(space_bitmap.get() != NULL);
 
   // Set all the odd bits in the first BitsPerWord * 3 to one.
   for (size_t j = 0; j < kBitsPerWord * 3; ++j) {
     const mirror::Object* obj =
-        reinterpret_cast<mirror::Object*>(heap_begin + j * SpaceBitmap::kAlignment);
+        reinterpret_cast<mirror::Object*>(heap_begin + j * kObjectAlignment);
     if (reinterpret_cast<uintptr_t>(obj) & 0xF) {
       space_bitmap->Set(obj);
     }
@@ -77,10 +78,10 @@
   // words.
   for (size_t i = 0; i < static_cast<size_t>(kBitsPerWord); ++i) {
     mirror::Object* start =
-        reinterpret_cast<mirror::Object*>(heap_begin + i * SpaceBitmap::kAlignment);
+        reinterpret_cast<mirror::Object*>(heap_begin + i * kObjectAlignment);
     for (size_t j = 0; j < static_cast<size_t>(kBitsPerWord * 2); ++j) {
       mirror::Object* end =
-          reinterpret_cast<mirror::Object*>(heap_begin + (i + j) * SpaceBitmap::kAlignment);
+          reinterpret_cast<mirror::Object*>(heap_begin + (i + j) * kObjectAlignment);
       BitmapVerify(space_bitmap.get(), start, end);
     }
   }
@@ -118,8 +119,8 @@
 
 
   for (int i = 0; i < 5 ; ++i) {
-    UniquePtr<SpaceBitmap> space_bitmap(SpaceBitmap::Create("test bitmap",
-                                                            heap_begin, heap_capacity));
+    UniquePtr<ContinuousSpaceBitmap> space_bitmap(
+        ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
 
     for (int j = 0; j < 10000; ++j) {
       size_t offset = (r.next() % heap_capacity) & ~(0x7);
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index a700c73..d99136a 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -174,8 +174,8 @@
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect ||
         (gc_type == kGcTypeFull &&
          space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
-      accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-      accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+      accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+      accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
       if (live_bitmap != nullptr && live_bitmap != mark_bitmap) {
         heap_->GetLiveBitmap()->ReplaceBitmap(live_bitmap, mark_bitmap);
         heap_->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index bb41b57..f07e6f1 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -123,7 +123,6 @@
   mark_immune_count_ = 0;
   mark_fastpath_count_ = 0;
   mark_slowpath_count_ = 0;
-  FindDefaultSpaceBitmap();
   {
     // TODO: I don't think we should need heap bitmap lock to get the mark bitmap.
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -293,7 +292,7 @@
 void MarkSweep::FindDefaultSpaceBitmap() {
   TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* bitmap = space->GetMarkBitmap();
     if (bitmap != nullptr &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
       current_space_bitmap_ = bitmap;
@@ -359,7 +358,7 @@
   }
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
+  accounting::ContinuousSpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
     object_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
     if (kCountMarkedObjects) {
@@ -428,9 +427,9 @@
   }
   // Try to take advantage of locality of references within a space, failing this find the space
   // the hard way.
-  accounting::SpaceBitmap* object_bitmap = current_space_bitmap_;
+  accounting::ContinuousSpaceBitmap* object_bitmap = current_space_bitmap_;
   if (UNLIKELY(!object_bitmap->HasAddress(obj))) {
-    accounting::SpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
+    accounting::ContinuousSpaceBitmap* new_bitmap = mark_bitmap_->GetContinuousSpaceBitmap(obj);
     if (new_bitmap != NULL) {
       object_bitmap = new_bitmap;
     } else {
@@ -476,7 +475,7 @@
 void MarkSweep::VerifyRoot(const Object* root, size_t vreg, const StackVisitor* visitor,
                            RootType root_type) {
   // See if the root is on any space bitmap.
-  if (GetHeap()->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
+  if (heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) {
     space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
     if (!large_object_space->Contains(root)) {
       LOG(ERROR) << "Found invalid root: " << root << " with type " << root_type;
@@ -686,7 +685,8 @@
 
 class CardScanTask : public MarkStackTask<false> {
  public:
-  CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, accounting::SpaceBitmap* bitmap,
+  CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
+               accounting::ContinuousSpaceBitmap* bitmap,
                byte* begin, byte* end, byte minimum_age, size_t mark_stack_size,
                Object** mark_stack_obj)
       : MarkStackTask<false>(thread_pool, mark_sweep, mark_stack_size, mark_stack_obj),
@@ -697,7 +697,7 @@
   }
 
  protected:
-  accounting::SpaceBitmap* const bitmap_;
+  accounting::ContinuousSpaceBitmap* const bitmap_;
   byte* const begin_;
   byte* const end_;
   const byte minimum_age_;
@@ -820,7 +820,7 @@
 class RecursiveMarkTask : public MarkStackTask<false> {
  public:
   RecursiveMarkTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
-                    accounting::SpaceBitmap* bitmap, uintptr_t begin, uintptr_t end)
+                    accounting::ContinuousSpaceBitmap* bitmap, uintptr_t begin, uintptr_t end)
       : MarkStackTask<false>(thread_pool, mark_sweep, 0, NULL),
         bitmap_(bitmap),
         begin_(begin),
@@ -828,7 +828,7 @@
   }
 
  protected:
-  accounting::SpaceBitmap* const bitmap_;
+  accounting::ContinuousSpaceBitmap* const bitmap_;
   const uintptr_t begin_;
   const uintptr_t end_;
 
@@ -1045,8 +1045,8 @@
   // Start by sweeping the continuous spaces.
   for (space::ContinuousSpace* space : sweep_spaces) {
     space::AllocSpace* alloc_space = space->AsAllocSpace();
-    accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
     if (swap_bitmaps) {
       std::swap(live_bitmap, mark_bitmap);
     }
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index d49e427..6dbb270 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -22,6 +22,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "garbage_collector.h"
+#include "gc/accounting/space_bitmap.h"
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -45,7 +46,6 @@
 namespace accounting {
   template<typename T> class AtomicStack;
   typedef AtomicStack<mirror::Object*> ObjectStack;
-  class SpaceBitmap;
 }  // namespace accounting
 
 namespace collector {
@@ -283,7 +283,7 @@
 
   // Current space, we check this space first to avoid searching for the appropriate space for an
   // object.
-  accounting::SpaceBitmap* current_space_bitmap_;
+  accounting::ContinuousSpaceBitmap* current_space_bitmap_;
   // Cache the heap's mark bitmap to prevent having to do 2 loads during slow path marking.
   accounting::HeapBitmap* mark_bitmap_;
 
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index df731ff..8a9611f 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -65,7 +65,7 @@
       }
       obj_ptr->Assign(forward_address);
     } else {
-      accounting::SpaceBitmap* object_bitmap =
+      accounting::ContinuousSpaceBitmap* object_bitmap =
           heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
       if (LIKELY(object_bitmap != nullptr)) {
         if (generational_) {
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index e82d533..c0e172e 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -63,7 +63,6 @@
 namespace collector {
 
 static constexpr bool kProtectFromSpace = true;
-static constexpr bool kClearFromSpace = true;
 static constexpr bool kStoreStackTraces = false;
 static constexpr bool kUseBytesPromoted = true;
 static constexpr size_t kBytesPromotedThreshold = 4 * MB;
@@ -122,6 +121,7 @@
   // Do any pre GC verification.
   timings_.NewSplit("PreGcVerification");
   heap_->PreGcVerification(this);
+  CHECK(from_space_->CanMoveObjects()) << "Attempting to move from " << *from_space_;
   // Set the initial bitmap.
   to_space_live_bitmap_ = to_space_->GetLiveBitmap();
 }
@@ -182,9 +182,6 @@
   Locks::mutator_lock_->AssertExclusiveHeld(self_);
 
   TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
-  // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
-  // wrong space.
-  heap_->SwapSemiSpaces();
   if (generational_) {
     // If last_gc_to_space_end_ is out of the bounds of the from-space
     // (the to-space from last GC), then point it to the beginning of
@@ -336,7 +333,7 @@
           // remain in the space, that is, the remembered set (and the
           // card table) didn't miss any from-space references in the
           // space.
-          accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+          accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
           SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor visitor(this);
           live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
                                         reinterpret_cast<uintptr_t>(space->End()),
@@ -344,7 +341,7 @@
         }
       } else {
         DCHECK(rem_set == nullptr);
-        accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+        accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
         SemiSpaceScanObjectVisitor visitor(this);
         live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
                                       reinterpret_cast<uintptr_t>(space->End()),
@@ -396,10 +393,10 @@
   // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
   // space.
   heap_->RecordFree(freed_objects, freed_bytes);
+
   timings_.StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
   timings_.EndSplit();
-
   {
     WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     // Reclaim unmarked objects.
@@ -414,11 +411,9 @@
     TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
     GetHeap()->UnBindBitmaps();
   }
-  if (kClearFromSpace) {
-    // Release the memory used by the from space.
-    from_space_->Clear();
-  }
-  from_space_->Reset();
+  // TODO: Do this before doing verification since the from space may have objects which weren't
+  // moved and point to dead objects.
+  from_space_->Clear();
   // Protect the from space.
   VLOG(heap) << "Protecting space " << *from_space_;
   if (kProtectFromSpace) {
@@ -540,9 +535,9 @@
       // space.
       GetHeap()->WriteBarrierEveryFieldOf(forward_address);
       // Handle the bitmaps marking.
-      accounting::SpaceBitmap* live_bitmap = promo_dest_space->GetLiveBitmap();
+      accounting::ContinuousSpaceBitmap* live_bitmap = promo_dest_space->GetLiveBitmap();
       DCHECK(live_bitmap != nullptr);
-      accounting::SpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
+      accounting::ContinuousSpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
       DCHECK(mark_bitmap != nullptr);
       DCHECK(!live_bitmap->Test(forward_address));
       if (!whole_heap_collection_) {
@@ -715,8 +710,8 @@
 
 // Scan anything that's on the mark stack.
 void SemiSpace::ProcessMarkStack() {
-  space::MallocSpace* promo_dest_space = NULL;
-  accounting::SpaceBitmap* live_bitmap = NULL;
+  space::MallocSpace* promo_dest_space = nullptr;
+  accounting::ContinuousSpaceBitmap* live_bitmap = nullptr;
   if (generational_ && !whole_heap_collection_) {
     // If a bump pointer space only collection (and the promotion is
     // enabled,) we delay the live-bitmap marking of promoted objects
@@ -724,7 +719,7 @@
     promo_dest_space = GetHeap()->GetPrimaryFreeListSpace();
     live_bitmap = promo_dest_space->GetLiveBitmap();
     DCHECK(live_bitmap != nullptr);
-    accounting::SpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = promo_dest_space->GetMarkBitmap();
     DCHECK(mark_bitmap != nullptr);
     DCHECK_EQ(live_bitmap, mark_bitmap);
   }
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 3442751..4169ca9 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -21,6 +21,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "garbage_collector.h"
+#include "gc/accounting/space_bitmap.h"
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -42,7 +43,6 @@
 namespace accounting {
   template <typename T> class AtomicStack;
   typedef AtomicStack<mirror::Object*> ObjectStack;
-  class SpaceBitmap;
 }  // namespace accounting
 
 namespace space {
@@ -198,7 +198,8 @@
   // Destination and source spaces (can be any type of ContinuousMemMapAllocSpace which either has
   // a live bitmap or doesn't).
   space::ContinuousMemMapAllocSpace* to_space_;
-  accounting::SpaceBitmap* to_space_live_bitmap_;  // Cached live bitmap as an optimization.
+  // Cached live bitmap as an optimization.
+  accounting::ContinuousSpaceBitmap* to_space_live_bitmap_;
   space::ContinuousMemMapAllocSpace* from_space_;
 
   Thread* self_;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index fcf9fe9..de7d0b8 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -81,10 +81,16 @@
 // relative to partial/full GC. This is desirable since sticky GCs interfere less with mutator
 // threads (lower pauses, use less memory bandwidth).
 static constexpr double kStickyGcThroughputAdjustment = 1.25;
+// Whether or not we use the free list large object space.
+static constexpr bool kUseFreeListSpaceForLOS = false;
+// Whtehr or not we compact the zygote in PreZygoteFork.
+static constexpr bool kCompactZygote = kMovingCollector;
+static constexpr size_t kNonMovingSpaceCapacity = 64 * MB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
-           double target_utilization, size_t capacity, const std::string& image_file_name,
-           CollectorType post_zygote_collector_type, CollectorType background_collector_type,
+           double target_utilization, double foreground_heap_growth_multiplier, size_t capacity,
+           const std::string& image_file_name,
+           CollectorType foreground_collector_type, CollectorType background_collector_type,
            size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
            size_t long_pause_log_threshold, size_t long_gc_log_threshold,
            bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
@@ -95,9 +101,9 @@
       dlmalloc_space_(nullptr),
       main_space_(nullptr),
       collector_type_(kCollectorTypeNone),
-      post_zygote_collector_type_(post_zygote_collector_type),
+      foreground_collector_type_(foreground_collector_type),
       background_collector_type_(background_collector_type),
-      desired_collector_type_(collector_type_),
+      desired_collector_type_(foreground_collector_type_),
       heap_trim_request_lock_(nullptr),
       last_trim_time_(0),
       heap_transition_target_time_(0),
@@ -149,6 +155,7 @@
       min_free_(min_free),
       max_free_(max_free),
       target_utilization_(target_utilization),
+      foreground_heap_growth_multiplier_(foreground_heap_growth_multiplier),
       total_wait_time_(0),
       total_allocation_time_(0),
       verify_object_mode_(kVerifyObjectModeDisabled),
@@ -162,15 +169,11 @@
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
   if (!is_zygote) {
-    desired_collector_type_ = post_zygote_collector_type_;
     large_object_threshold_ = kDefaultLargeObjectThreshold;
-  } else {
-    if (kMovingCollector) {
-      // We are the zygote, use bump pointer allocation + semi space collector.
-      bool generational = post_zygote_collector_type_ == kCollectorTypeGSS;
-      desired_collector_type_ = generational ? kCollectorTypeGSS : kCollectorTypeSS;
-    } else {
-      desired_collector_type_ = post_zygote_collector_type_;
+    // Background compaction is currently not supported for command line runs.
+    if (background_collector_type_ != foreground_collector_type_) {
+      LOG(WARNING) << "Disabling background compaction for non zygote";
+      background_collector_type_ = foreground_collector_type_;
     }
   }
   ChangeCollector(desired_collector_type_);
@@ -187,73 +190,61 @@
     // isn't going to get in the middle
     byte* oat_file_end_addr = image_space->GetImageHeader().GetOatFileEnd();
     CHECK_GT(oat_file_end_addr, image_space->End());
-    if (oat_file_end_addr > requested_alloc_space_begin) {
-      requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
-    }
+    requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
   }
-  MemMap* malloc_space_mem_map = nullptr;
-  const char* malloc_space_name = is_zygote ? "zygote space" : "alloc space";
   if (is_zygote) {
-    // Allocate a single mem map that is split into the malloc space
-    // and the post zygote non-moving space to put them adjacent.
-    size_t post_zygote_non_moving_space_size = 64 * MB;
-    size_t non_moving_spaces_size = capacity + post_zygote_non_moving_space_size;
+    // Reserve the address range before we create the non moving space to make sure bitmaps don't
+    // take it.
     std::string error_str;
-    malloc_space_mem_map = MemMap::MapAnonymous(malloc_space_name, requested_alloc_space_begin,
-                                                non_moving_spaces_size, PROT_READ | PROT_WRITE,
-                                                true, &error_str);
-    CHECK(malloc_space_mem_map != nullptr) << error_str;
-    post_zygote_non_moving_space_mem_map_.reset(malloc_space_mem_map->RemapAtEnd(
-        malloc_space_mem_map->Begin() + capacity, "post zygote non-moving space",
-        PROT_READ | PROT_WRITE, &error_str));
-    CHECK(post_zygote_non_moving_space_mem_map_.get() != nullptr) << error_str;
-    VLOG(heap) << "malloc space mem map : " << malloc_space_mem_map;
-    VLOG(heap) << "post zygote non-moving space mem map : "
-               << post_zygote_non_moving_space_mem_map_.get();
+    MemMap* mem_map = MemMap::MapAnonymous(
+        "main space", requested_alloc_space_begin + kNonMovingSpaceCapacity, capacity,
+        PROT_READ | PROT_WRITE, true, &error_str);
+    CHECK(mem_map != nullptr) << error_str;
+    // Non moving space is always dlmalloc since we currently don't have support for multiple
+    // rosalloc spaces.
+    non_moving_space_ = space::DlMallocSpace::Create(
+        "zygote / non moving space", initial_size, kNonMovingSpaceCapacity, kNonMovingSpaceCapacity,
+        requested_alloc_space_begin, false);
+    non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
+    CreateMainMallocSpace(mem_map, initial_size, growth_limit, capacity);
   } else {
-    // Allocate a mem map for the malloc space.
     std::string error_str;
-    malloc_space_mem_map = MemMap::MapAnonymous(malloc_space_name, requested_alloc_space_begin,
-                                                capacity, PROT_READ | PROT_WRITE, true, &error_str);
-    CHECK(malloc_space_mem_map != nullptr) << error_str;
-    VLOG(heap) << "malloc space mem map : " << malloc_space_mem_map;
+    MemMap* mem_map = MemMap::MapAnonymous("main/non-moving space", requested_alloc_space_begin,
+                                           capacity, PROT_READ | PROT_WRITE, true, &error_str);
+    CHECK(mem_map != nullptr) << error_str;
+    // Create the main free list space, which doubles as the non moving space. We can do this since
+    // non zygote means that we won't have any background compaction.
+    CreateMainMallocSpace(mem_map, initial_size, growth_limit, capacity);
+    non_moving_space_ = main_space_;
   }
-  CHECK(malloc_space_mem_map != nullptr);
-  space::MallocSpace* malloc_space;
-  if (kUseRosAlloc) {
-    malloc_space = space::RosAllocSpace::CreateFromMemMap(malloc_space_mem_map, malloc_space_name,
-                                                          kDefaultStartingSize, initial_size,
-                                                          growth_limit, capacity, low_memory_mode_);
-    CHECK(malloc_space != nullptr) << "Failed to create rosalloc space";
-  } else {
-    malloc_space = space::DlMallocSpace::CreateFromMemMap(malloc_space_mem_map, malloc_space_name,
-                                                          kDefaultStartingSize, initial_size,
-                                                          growth_limit, capacity);
-    CHECK(malloc_space != nullptr) << "Failed to create dlmalloc space";
-  }
-  VLOG(heap) << "malloc_space : " << malloc_space;
+  CHECK(non_moving_space_ != nullptr);
+
+  // We need to create the bump pointer if the foreground collector is a compacting GC. We only
+  // create the bump pointer space if we are not a moving foreground collector but have a moving
+  // background collector since the heap transition code will create the temp space by recycling
+  // the bitmap from the main space.
   if (kMovingCollector) {
     // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
-    // TODO: Having 3+ spaces as big as the large heap size can cause virtual memory fragmentation
-    // issues.
-    const size_t bump_pointer_space_size = std::min(malloc_space->Capacity(), 128 * MB);
+    // TODO: Not create all the bump pointer spaces if not necessary (currently only GSS needs all
+    // 2 of bump pointer spaces + main space) b/14059466. Divide by 2 for a temporary fix.
+    const size_t bump_pointer_space_capacity = capacity / 2;
     bump_pointer_space_ = space::BumpPointerSpace::Create("Bump pointer space",
-                                                          bump_pointer_space_size, nullptr);
+                                                          bump_pointer_space_capacity, nullptr);
     CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space";
     AddSpace(bump_pointer_space_);
-    temp_space_ = space::BumpPointerSpace::Create("Bump pointer space 2", bump_pointer_space_size,
-                                                  nullptr);
+    temp_space_ = space::BumpPointerSpace::Create("Bump pointer space 2",
+                                                  bump_pointer_space_capacity, nullptr);
     CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space";
     AddSpace(temp_space_);
-    VLOG(heap) << "bump_pointer_space : " << bump_pointer_space_;
-    VLOG(heap) << "temp_space : " << temp_space_;
   }
-  non_moving_space_ = malloc_space;
-  malloc_space->SetFootprintLimit(malloc_space->Capacity());
-  AddSpace(malloc_space);
+  if (non_moving_space_ != main_space_) {
+    AddSpace(non_moving_space_);
+  }
+  if (main_space_ != nullptr) {
+    AddSpace(main_space_);
+  }
 
   // Allocate the large object space.
-  constexpr bool kUseFreeListSpaceForLOS = false;
   if (kUseFreeListSpaceForLOS) {
     large_object_space_ = space::FreeListSpace::Create("large object space", nullptr, capacity);
   } else {
@@ -268,11 +259,6 @@
   // Relies on the spaces being sorted.
   byte* heap_begin = continuous_spaces_.front()->Begin();
   byte* heap_end = continuous_spaces_.back()->Limit();
-  if (is_zygote) {
-    CHECK(post_zygote_non_moving_space_mem_map_.get() != nullptr);
-    heap_begin = std::min(post_zygote_non_moving_space_mem_map_->Begin(), heap_begin);
-    heap_end = std::max(post_zygote_non_moving_space_mem_map_->End(), heap_end);
-  }
   size_t heap_capacity = heap_end - heap_begin;
 
   // Allocate the card table.
@@ -292,6 +278,12 @@
         new accounting::RememberedSet("Non-moving space remembered set", this, non_moving_space_);
     CHECK(non_moving_space_rem_set != nullptr) << "Failed to create non-moving space remembered set";
     AddRememberedSet(non_moving_space_rem_set);
+    if (main_space_ != nullptr && main_space_ != non_moving_space_) {
+      accounting::RememberedSet* main_space_rem_set =
+          new accounting::RememberedSet("Main space remembered set", this, main_space_);
+      CHECK(main_space_rem_set != nullptr) << "Failed to create main space remembered set";
+      AddRememberedSet(main_space_rem_set);
+    }
   }
 
   // TODO: Count objects in the image space here.
@@ -329,7 +321,7 @@
   }
   if (kMovingCollector) {
     // TODO: Clean this up.
-    bool generational = post_zygote_collector_type_ == kCollectorTypeGSS;
+    bool generational = foreground_collector_type_ == kCollectorTypeGSS;
     semi_space_collector_ = new collector::SemiSpace(this, generational,
                                                      generational ? "generational" : "");
     garbage_collectors_.push_back(semi_space_collector_);
@@ -347,6 +339,37 @@
   }
 }
 
+void Heap::CreateMainMallocSpace(MemMap* mem_map, size_t initial_size, size_t growth_limit,
+                                 size_t capacity) {
+  // Is background compaction is enabled?
+  bool can_move_objects = IsMovingGc(background_collector_type_) !=
+      IsMovingGc(foreground_collector_type_);
+  // If we are the zygote and don't yet have a zygote space, it means that the zygote fork will
+  // happen in the future. If this happens and we have kCompactZygote enabled we wish to compact
+  // from the main space to the zygote space. If background compaction is enabled, always pass in
+  // that we can move objets.
+  if (kCompactZygote && Runtime::Current()->IsZygote() && !can_move_objects) {
+    // After the zygote we want this to be false if we don't have background compaction enabled so
+    // that getting primitive array elements is faster.
+    can_move_objects = !have_zygote_space_;
+  }
+  if (kUseRosAlloc) {
+    main_space_ = space::RosAllocSpace::CreateFromMemMap(mem_map, "main rosalloc space",
+                                                          kDefaultStartingSize, initial_size,
+                                                          growth_limit, capacity, low_memory_mode_,
+                                                          can_move_objects);
+    CHECK(main_space_ != nullptr) << "Failed to create rosalloc space";
+  } else {
+    main_space_ = space::DlMallocSpace::CreateFromMemMap(mem_map, "main dlmalloc space",
+                                                          kDefaultStartingSize, initial_size,
+                                                          growth_limit, capacity,
+                                                          can_move_objects);
+    CHECK(main_space_ != nullptr) << "Failed to create dlmalloc space";
+  }
+  main_space_->SetFootprintLimit(main_space_->Capacity());
+  VLOG(heap) << "Created main space " << main_space_;
+}
+
 void Heap::ChangeAllocator(AllocatorType allocator) {
   if (current_allocator_ != allocator) {
     // These two allocators are only used internally and don't have any entrypoints.
@@ -360,13 +383,13 @@
 }
 
 void Heap::DisableCompaction() {
-  if (IsCompactingGC(post_zygote_collector_type_)) {
-    post_zygote_collector_type_ = kCollectorTypeCMS;
+  if (IsMovingGc(foreground_collector_type_)) {
+    foreground_collector_type_  = kCollectorTypeCMS;
   }
-  if (IsCompactingGC(background_collector_type_)) {
-    background_collector_type_ = post_zygote_collector_type_;
+  if (IsMovingGc(background_collector_type_)) {
+    background_collector_type_ = foreground_collector_type_;
   }
-  TransitionCollector(post_zygote_collector_type_);
+  TransitionCollector(foreground_collector_type_);
 }
 
 std::string Heap::SafeGetClassDescriptor(mirror::Class* klass) {
@@ -428,14 +451,6 @@
         break;
       }
     }
-    if (space == nullptr) {
-      if (allocator_mem_map_.get() == nullptr || !allocator_mem_map_->HasAddress(obj)) {
-        stream << "obj " << obj << " not a valid heap address";
-        return;
-      } else if (allocator_mem_map_.get() != nullptr) {
-        allocator_mem_map_->Protect(PROT_READ | PROT_WRITE);
-      }
-    }
     // Unprotect all the spaces.
     for (const auto& space : continuous_spaces_) {
       mprotect(space->Begin(), space->Capacity(), PROT_READ | PROT_WRITE);
@@ -478,7 +493,7 @@
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
   ++disable_moving_gc_count_;
-  if (IsCompactingGC(collector_type_running_)) {
+  if (IsMovingGc(collector_type_running_)) {
     WaitForGcToCompleteLocked(self);
   }
 }
@@ -496,12 +511,12 @@
       // Start at index 1 to avoid "is always false" warning.
       // Have iteration 1 always transition the collector.
       TransitionCollector((((i & 1) == 1) == (process_state_ == kProcessStateJankPerceptible))
-                          ? post_zygote_collector_type_ : background_collector_type_);
+                          ? foreground_collector_type_ : background_collector_type_);
       usleep(kCollectorTransitionStressWait);
     }
     if (process_state_ == kProcessStateJankPerceptible) {
       // Transition back to foreground right away to prevent jank.
-      RequestCollectorTransition(post_zygote_collector_type_, 0);
+      RequestCollectorTransition(foreground_collector_type_, 0);
     } else {
       // Don't delay for debug builds since we may want to stress test the GC.
       RequestCollectorTransition(background_collector_type_, kIsDebugBuild ? 0 :
@@ -568,8 +583,8 @@
     DCHECK(!space->IsDiscontinuousSpace());
     space::ContinuousSpace* continuous_space = space->AsContinuousSpace();
     // Continuous spaces don't necessarily have bitmaps.
-    accounting::SpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
-    accounting::SpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
     if (live_bitmap != nullptr) {
       DCHECK(mark_bitmap != nullptr);
       live_bitmap_->AddContinuousSpaceBitmap(live_bitmap);
@@ -609,8 +624,8 @@
     DCHECK(!space->IsDiscontinuousSpace());
     space::ContinuousSpace* continuous_space = space->AsContinuousSpace();
     // Continuous spaces don't necessarily have bitmaps.
-    accounting::SpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
-    accounting::SpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
     if (live_bitmap != nullptr) {
       DCHECK(mark_bitmap != nullptr);
       live_bitmap_->RemoveContinuousSpaceBitmap(live_bitmap);
@@ -626,6 +641,10 @@
     }
     if (continuous_space == main_space_) {
       main_space_ = nullptr;
+    } else if (continuous_space == bump_pointer_space_) {
+      bump_pointer_space_ = nullptr;
+    } else if (continuous_space == temp_space_) {
+      temp_space_ = nullptr;
     }
   } else {
     DCHECK(space->IsDiscontinuousSpace());
@@ -667,7 +686,8 @@
   for (auto& collector : garbage_collectors_) {
     const CumulativeLogger& logger = collector->GetCumulativeTimings();
     const size_t iterations = logger.GetIterations();
-    if (iterations != 0) {
+    const Histogram<uint64_t>& pause_histogram = collector->GetPauseHistogram();
+    if (iterations != 0 && pause_histogram.SampleSize() != 0) {
       os << ConstDumpable<CumulativeLogger>(logger);
       const uint64_t total_ns = logger.GetTotalNs();
       const uint64_t total_pause_ns = collector->GetTotalPausedTimeNs();
@@ -675,8 +695,8 @@
       const uint64_t freed_bytes = collector->GetTotalFreedBytes();
       const uint64_t freed_objects = collector->GetTotalFreedObjects();
       Histogram<uint64_t>::CumulativeData cumulative_data;
-      collector->GetPauseHistogram().CreateHistogram(&cumulative_data);
-      collector->GetPauseHistogram().PrintConfidenceIntervals(os, 0.99, cumulative_data);
+      pause_histogram.CreateHistogram(&cumulative_data);
+      pause_histogram.PrintConfidenceIntervals(os, 0.99, cumulative_data);
       os << collector->GetName() << " total time: " << PrettyDuration(total_ns)
          << " mean time: " << PrettyDuration(total_ns / iterations) << "\n"
          << collector->GetName() << " freed: " << freed_objects
@@ -966,8 +986,10 @@
       managed_reclaimed += alloc_space->Trim();
     }
   }
-  total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated() -
-      bump_pointer_space_->Size();
+  total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated();
+  if (bump_pointer_space_ != nullptr) {
+    total_alloc_space_allocated -= bump_pointer_space_->Size();
+  }
   const float managed_utilization = static_cast<float>(total_alloc_space_allocated) /
       static_cast<float>(total_alloc_space_size);
   uint64_t gc_heap_end_ns = NanoTime();
@@ -1083,8 +1105,8 @@
 
 void Heap::DumpSpaces(std::ostream& stream) {
   for (const auto& space : continuous_spaces_) {
-    accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
     stream << space << " " << *space << "\n";
     if (live_bitmap != nullptr) {
       stream << live_bitmap << " " << *live_bitmap << "\n";
@@ -1392,14 +1414,13 @@
   VLOG(heap) << "TransitionCollector: " << static_cast<int>(collector_type_)
              << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
-  uint32_t before_size  = GetTotalMemory();
   uint32_t before_allocated = num_bytes_allocated_.Load();
   ThreadList* tl = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
   const bool copying_transition =
-      IsCompactingGC(background_collector_type_) || IsCompactingGC(post_zygote_collector_type_);
+      IsMovingGc(background_collector_type_) || IsMovingGc(foreground_collector_type_);
   // Busy wait until we can GC (StartGC can fail if we have a non-zero
   // compacting_gc_disable_count_, this should rarely occurs).
   for (;;) {
@@ -1430,42 +1451,20 @@
     case kCollectorTypeSS:
       // Fall-through.
     case kCollectorTypeGSS: {
-      mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
-      CHECK(main_space_ != nullptr);
-      Compact(temp_space_, main_space_);
-      DCHECK(allocator_mem_map_.get() == nullptr);
-      allocator_mem_map_.reset(main_space_->ReleaseMemMap());
-      madvise(main_space_->Begin(), main_space_->Size(), MADV_DONTNEED);
-      // RemoveSpace does not delete the removed space.
-      space::Space* old_space = main_space_;
-      RemoveSpace(old_space);
-      delete old_space;
+      if (!IsMovingGc(collector_type_)) {
+        // We are transitioning from non moving GC -> moving GC, since we copied from the bump
+        // pointer space last transition it will be protected.
+        bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+        Compact(bump_pointer_space_, main_space_);
+      }
       break;
     }
     case kCollectorTypeMS:
       // Fall through.
     case kCollectorTypeCMS: {
-      if (IsCompactingGC(collector_type_)) {
-        // TODO: Use mem-map from temp space?
-        MemMap* mem_map = allocator_mem_map_.release();
-        CHECK(mem_map != nullptr);
-        size_t starting_size = kDefaultStartingSize;
-        size_t initial_size = kDefaultInitialSize;
-        mprotect(mem_map->Begin(), initial_size, PROT_READ | PROT_WRITE);
-        CHECK(main_space_ == nullptr);
-        if (kUseRosAlloc) {
-          main_space_ =
-              space::RosAllocSpace::CreateFromMemMap(mem_map, "alloc space", starting_size,
-                                                     initial_size, mem_map->Size(),
-                                                     mem_map->Size(), low_memory_mode_);
-        } else {
-          main_space_ =
-              space::DlMallocSpace::CreateFromMemMap(mem_map, "alloc space", starting_size,
-                                                     initial_size, mem_map->Size(),
-                                                     mem_map->Size());
-        }
-        main_space_->SetFootprintLimit(main_space_->Capacity());
-        AddSpace(main_space_);
+      if (IsMovingGc(collector_type_)) {
+        // Compact to the main space from the bump pointer space, don't need to swap semispaces.
+        main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
         Compact(main_space_, bump_pointer_space_);
       }
       break;
@@ -1484,16 +1483,10 @@
   uint64_t duration = NanoTime() - start_time;
   GrowForUtilization(semi_space_collector_);
   FinishGC(self, collector::kGcTypeFull);
-  int32_t after_size = GetTotalMemory();
-  int32_t delta_size = before_size - after_size;
   int32_t after_allocated = num_bytes_allocated_.Load();
   int32_t delta_allocated = before_allocated - after_allocated;
-  const std::string saved_bytes_str =
-      delta_size < 0 ? "-" + PrettySize(-delta_size) : PrettySize(delta_size);
   LOG(INFO) << "Heap transition to " << process_state_ << " took "
-      << PrettyDuration(duration) << " " << PrettySize(before_size) << "->"
-      << PrettySize(after_size) << " from " << PrettySize(delta_allocated) << " to "
-      << PrettySize(delta_size) << " saved";
+      << PrettyDuration(duration) << " saved at least " << PrettySize(delta_allocated);
 }
 
 void Heap::ChangeCollector(CollectorType collector_type) {
@@ -1568,9 +1561,9 @@
   // Maps from bin sizes to locations.
   std::multimap<size_t, uintptr_t> bins_;
   // Live bitmap of the space which contains the bins.
-  accounting::SpaceBitmap* bin_live_bitmap_;
+  accounting::ContinuousSpaceBitmap* bin_live_bitmap_;
   // Mark bitmap of the space which contains the bins.
-  accounting::SpaceBitmap* bin_mark_bitmap_;
+  accounting::ContinuousSpaceBitmap* bin_mark_bitmap_;
 
   static void Callback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1661,11 +1654,12 @@
   VLOG(heap) << "Starting PreZygoteFork";
   // Trim the pages at the end of the non moving space.
   non_moving_space_->Trim();
+  // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote
+  // there.
   non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
   // Change the collector to the post zygote one.
-  ChangeCollector(post_zygote_collector_type_);
-  // TODO: Delete bump_pointer_space_ and temp_pointer_space_?
-  if (semi_space_collector_ != nullptr) {
+  if (kCompactZygote) {
+    DCHECK(semi_space_collector_ != nullptr);
     // Temporarily disable rosalloc verification because the zygote
     // compaction will mess up the rosalloc internal metadata.
     ScopedDisableRosAllocVerification disable_rosalloc_verif(this);
@@ -1675,18 +1669,47 @@
     space::BumpPointerSpace target_space("zygote bump space", non_moving_space_->End(),
                                          non_moving_space_->Limit());
     // Compact the bump pointer space to a new zygote bump pointer space.
-    temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
-    zygote_collector.SetFromSpace(bump_pointer_space_);
+    bool reset_main_space = false;
+    if (IsMovingGc(collector_type_)) {
+      zygote_collector.SetFromSpace(bump_pointer_space_);
+    } else {
+      CHECK(main_space_ != nullptr);
+      // Copy from the main space.
+      zygote_collector.SetFromSpace(main_space_);
+      reset_main_space = true;
+    }
     zygote_collector.SetToSpace(&target_space);
+
+    Runtime::Current()->GetThreadList()->SuspendAll();
     zygote_collector.Run(kGcCauseCollectorTransition, false);
-    CHECK(temp_space_->IsEmpty());
+    if (IsMovingGc(collector_type_)) {
+      SwapSemiSpaces();
+    }
+    Runtime::Current()->GetThreadList()->ResumeAll();
+
+    if (reset_main_space) {
+      main_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+      madvise(main_space_->Begin(), main_space_->Capacity(), MADV_DONTNEED);
+      MemMap* mem_map = main_space_->ReleaseMemMap();
+      RemoveSpace(main_space_);
+      delete main_space_;
+      main_space_ = nullptr;
+      CreateMainMallocSpace(mem_map, kDefaultInitialSize, mem_map->Size(), mem_map->Size());
+      AddSpace(main_space_);
+    } else {
+      bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+    }
+    if (temp_space_ != nullptr) {
+      CHECK(temp_space_->IsEmpty());
+    }
     total_objects_freed_ever_ += semi_space_collector_->GetFreedObjects();
     total_bytes_freed_ever_ += semi_space_collector_->GetFreedBytes();
     // Update the end and write out image.
     non_moving_space_->SetEnd(target_space.End());
     non_moving_space_->SetLimit(target_space.Limit());
-    VLOG(heap) << "Zygote size " << non_moving_space_->Size() << " bytes";
+    VLOG(heap) << "Zygote space size " << non_moving_space_->Size() << " bytes";
   }
+  ChangeCollector(foreground_collector_type_);
   // Save the old space so that we can remove it after we complete creating the zygote space.
   space::MallocSpace* old_alloc_space = non_moving_space_;
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
@@ -1706,18 +1729,12 @@
   }
   space::ZygoteSpace* zygote_space = old_alloc_space->CreateZygoteSpace("alloc space",
                                                                         low_memory_mode_,
-                                                                        &main_space_);
+                                                                        &non_moving_space_);
   delete old_alloc_space;
   CHECK(zygote_space != nullptr) << "Failed creating zygote space";
   AddSpace(zygote_space, false);
-  CHECK(main_space_ != nullptr);
-  if (main_space_->IsRosAllocSpace()) {
-    rosalloc_space_ = main_space_->AsRosAllocSpace();
-  } else if (main_space_->IsDlMallocSpace()) {
-    dlmalloc_space_ = main_space_->AsDlMallocSpace();
-  }
-  main_space_->SetFootprintLimit(main_space_->Capacity());
-  AddSpace(main_space_);
+  non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
+  AddSpace(non_moving_space_);
   have_zygote_space_ = true;
   // Enable large object space allocations.
   large_object_threshold_ = kDefaultLargeObjectThreshold;
@@ -1727,23 +1744,6 @@
   CHECK(mod_union_table != nullptr) << "Failed to create zygote space mod-union table";
   AddModUnionTable(mod_union_table);
   if (collector::SemiSpace::kUseRememberedSet) {
-    // Add a new remembered set for the new main space.
-    accounting::RememberedSet* main_space_rem_set =
-        new accounting::RememberedSet("Main space remembered set", this, main_space_);
-    CHECK(main_space_rem_set != nullptr) << "Failed to create main space remembered set";
-    AddRememberedSet(main_space_rem_set);
-  }
-  // Can't use RosAlloc for non moving space due to thread local buffers.
-  // TODO: Non limited space for non-movable objects?
-  MemMap* mem_map = post_zygote_non_moving_space_mem_map_.release();
-  space::MallocSpace* new_non_moving_space =
-      space::DlMallocSpace::CreateFromMemMap(mem_map, "Non moving dlmalloc space", kPageSize,
-                                             2 * MB, mem_map->Size(), mem_map->Size());
-  AddSpace(new_non_moving_space, false);
-  CHECK(new_non_moving_space != nullptr) << "Failed to create new non-moving space";
-  new_non_moving_space->SetFootprintLimit(new_non_moving_space->Capacity());
-  non_moving_space_ = new_non_moving_space;
-  if (collector::SemiSpace::kUseRememberedSet) {
     // Add a new remembered set for the post-zygote non-moving space.
     accounting::RememberedSet* post_zygote_non_moving_space_rem_set =
         new accounting::RememberedSet("Post-zygote non-moving space remembered set", this,
@@ -1759,8 +1759,8 @@
   allocation_stack_->Reset();
 }
 
-void Heap::MarkAllocStack(accounting::SpaceBitmap* bitmap1,
-                          accounting::SpaceBitmap* bitmap2,
+void Heap::MarkAllocStack(accounting::ContinuousSpaceBitmap* bitmap1,
+                          accounting::ContinuousSpaceBitmap* bitmap2,
                           accounting::ObjectSet* large_objects,
                           accounting::ObjectStack* stack) {
   DCHECK(bitmap1 != nullptr);
@@ -1781,9 +1781,9 @@
 }
 
 void Heap::SwapSemiSpaces() {
-  // Swap the spaces so we allocate into the space which we just evacuated.
+  CHECK(bump_pointer_space_ != nullptr);
+  CHECK(temp_space_ != nullptr);
   std::swap(bump_pointer_space_, temp_space_);
-  bump_pointer_space_->Clear();
 }
 
 void Heap::Compact(space::ContinuousMemMapAllocSpace* target_space,
@@ -1826,7 +1826,7 @@
     MutexLock mu(self, *gc_complete_lock_);
     // Ensure there is only one GC at a time.
     WaitForGcToCompleteLocked(self);
-    compacting_gc = IsCompactingGC(collector_type_);
+    compacting_gc = IsMovingGc(collector_type_);
     // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
     if (compacting_gc && disable_moving_gc_count_ != 0) {
       LOG(WARNING) << "Skipping GC due to disable moving GC count " << disable_moving_gc_count_;
@@ -1881,7 +1881,14 @@
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
   ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
-  collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
+  if (compacting_gc) {
+    runtime->GetThreadList()->SuspendAll();
+    collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
+    SwapSemiSpaces();
+    runtime->GetThreadList()->ResumeAll();
+  } else {
+    collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
+  }
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
   RequestHeapTrim();
@@ -2026,7 +2033,8 @@
           accounting::CardTable::kCardSize);
       LOG(ERROR) << "Card " << reinterpret_cast<void*>(card_addr) << " covers " << cover_begin
           << "-" << cover_end;
-      accounting::SpaceBitmap* bitmap = heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(obj);
+      accounting::ContinuousSpaceBitmap* bitmap =
+          heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(obj);
 
       if (bitmap == nullptr) {
         LOG(ERROR) << "Object " << obj << " has no bitmap";
@@ -2396,9 +2404,11 @@
       WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       // Swapping bound bitmaps does nothing.
       gc->SwapBitmaps();
+      SwapSemiSpaces();
       if (!VerifyHeapReferences()) {
         LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed";
       }
+      SwapSemiSpaces();
       gc->SwapBitmaps();
     }
   }
@@ -2484,25 +2494,11 @@
 
 bool Heap::IsMovableObject(const mirror::Object* obj) const {
   if (kMovingCollector) {
-    DCHECK(!IsInTempSpace(obj));
-    if (bump_pointer_space_->HasAddress(obj)) {
-      return true;
+    space::Space* space = FindContinuousSpaceFromObject(obj, true);
+    if (space != nullptr) {
+      // TODO: Check large object?
+      return space->CanMoveObjects();
     }
-    // TODO: Refactor this logic into the space itself?
-    // Objects in the main space are only copied during background -> foreground transitions or
-    // visa versa.
-    if (main_space_ != nullptr && main_space_->HasAddress(obj) &&
-        (IsCompactingGC(background_collector_type_) ||
-            IsCompactingGC(post_zygote_collector_type_))) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool Heap::IsInTempSpace(const mirror::Object* obj) const {
-  if (temp_space_->HasAddress(obj) && !temp_space_->Contains(obj)) {
-    return true;
   }
   return false;
 }
@@ -2530,22 +2526,33 @@
   return nullptr;
 }
 
+double Heap::HeapGrowthMultiplier() const {
+  // If we don't care about pause times we are background, so return 1.0.
+  if (!CareAboutPauseTimes() || IsLowMemoryMode()) {
+    return 1.0;
+  }
+  return foreground_heap_growth_multiplier_;
+}
+
 void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran) {
   // We know what our utilization is at this moment.
   // This doesn't actually resize any memory. It just lets the heap grow more when necessary.
-  const size_t bytes_allocated = GetBytesAllocated();
+  const uint64_t bytes_allocated = GetBytesAllocated();
   last_gc_size_ = bytes_allocated;
   last_gc_time_ns_ = NanoTime();
-  size_t target_size;
+  uint64_t target_size;
   collector::GcType gc_type = collector_ran->GetGcType();
   if (gc_type != collector::kGcTypeSticky) {
     // Grow the heap for non sticky GC.
-    target_size = bytes_allocated / GetTargetHeapUtilization();
-    if (target_size > bytes_allocated + max_free_) {
-      target_size = bytes_allocated + max_free_;
-    } else if (target_size < bytes_allocated + min_free_) {
-      target_size = bytes_allocated + min_free_;
-    }
+    const float multiplier = HeapGrowthMultiplier();  // Use the multiplier to grow more for
+    // foreground.
+    intptr_t delta = bytes_allocated / GetTargetHeapUtilization() - bytes_allocated;
+    CHECK_GE(delta, 0);
+    target_size = bytes_allocated + delta * multiplier;
+    target_size = std::min(target_size,
+                           bytes_allocated + static_cast<uint64_t>(max_free_ * multiplier));
+    target_size = std::max(target_size,
+                           bytes_allocated + static_cast<uint64_t>(min_free_ * multiplier));
     native_need_to_run_finalization_ = true;
     next_gc_type_ = collector::kGcTypeSticky;
   } else {
@@ -2570,7 +2577,7 @@
     if (bytes_allocated + max_free_ < max_allowed_footprint_) {
       target_size = bytes_allocated + max_free_;
     } else {
-      target_size = std::max(bytes_allocated, max_allowed_footprint_);
+      target_size = std::max(bytes_allocated, static_cast<uint64_t>(max_allowed_footprint_));
     }
   }
   if (!ignore_max_footprint_) {
@@ -2594,7 +2601,8 @@
       // Start a concurrent GC when we get close to the estimated remaining bytes. When the
       // allocation rate is very high, remaining_bytes could tell us that we should start a GC
       // right away.
-      concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes, bytes_allocated);
+      concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes,
+                                         static_cast<size_t>(bytes_allocated));
     }
   }
 }
@@ -2804,7 +2812,7 @@
       if (IsGcConcurrent()) {
         RequestConcurrentGC(self);
       } else {
-        CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
+        CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false);
       }
     }
   }
@@ -2873,7 +2881,7 @@
 void Heap::ClearMarkedObjects() {
   // Clear all of the spaces' mark bitmaps.
   for (const auto& space : GetContinuousSpaces()) {
-    accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = space->GetMarkBitmap();
     if (space->GetLiveBitmap() != mark_bitmap) {
       mark_bitmap->Clear();
     }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index a8989ec..35724e3 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -132,9 +132,8 @@
   static constexpr size_t kDefaultLongPauseLogThreshold = MsToNs(5);
   static constexpr size_t kDefaultLongGCLogThreshold = MsToNs(100);
   static constexpr size_t kDefaultTLABSize = 256 * KB;
-
-  // Default target utilization.
   static constexpr double kDefaultTargetUtilization = 0.5;
+  static constexpr double kDefaultHeapGrowthMultiplier = 2.0;
 
   // Used so that we don't overflow the allocation time atomic integer.
   static constexpr size_t kTimeAdjust = 1024;
@@ -148,9 +147,10 @@
   // image_file_names names specify Spaces to load based on
   // ImageWriter output.
   explicit Heap(size_t initial_size, size_t growth_limit, size_t min_free,
-                size_t max_free, double target_utilization, size_t capacity,
+                size_t max_free, double target_utilization,
+                double foreground_heap_growth_multiplier, size_t capacity,
                 const std::string& original_image_file_name,
-                CollectorType post_zygote_collector_type, CollectorType background_collector_type,
+                CollectorType foreground_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
                 bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
@@ -196,8 +196,6 @@
   void VisitObjects(ObjectCallback callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  void SwapSemiSpaces() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation);
@@ -249,10 +247,6 @@
   // Returns true if there is any chance that the object (obj) will move.
   bool IsMovableObject(const mirror::Object* obj) const;
 
-  // Returns true if an object is in the temp space, if this happens its usually indicative of
-  // compaction related errors.
-  bool IsInTempSpace(const mirror::Object* obj) const;
-
   // Enables us to compacting GC until objects are released.
   void IncrementDisableMovingGC(Thread* self);
   void DecrementDisableMovingGC(Thread* self);
@@ -357,6 +351,10 @@
     return low_memory_mode_;
   }
 
+  // Returns the heap growth multiplier, this affects how much we grow the heap after a GC.
+  // Scales heap growth, min free, and max free.
+  double HeapGrowthMultiplier() const;
+
   // Freed bytes can be negative in cases where we copy objects from a compacted space to a
   // free-list backed space.
   void RecordFree(ssize_t freed_objects, ssize_t freed_bytes);
@@ -476,7 +474,9 @@
       LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
 
   // Mark all the objects in the allocation stack in the specified bitmap.
-  void MarkAllocStack(accounting::SpaceBitmap* bitmap1, accounting::SpaceBitmap* bitmap2,
+  // TODO: Refactor?
+  void MarkAllocStack(accounting::SpaceBitmap<kObjectAlignment>* bitmap1,
+                      accounting::SpaceBitmap<kObjectAlignment>* bitmap2,
                       accounting::ObjectSet* large_objects, accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -568,7 +568,8 @@
 
  private:
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
-               space::ContinuousMemMapAllocSpace* source_space);
+               space::ContinuousMemMapAllocSpace* source_space)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void FinishGC(Thread* self, collector::GcType gc_type) LOCKS_EXCLUDED(gc_complete_lock_);
 
@@ -580,7 +581,7 @@
   static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
     return AllocatorHasAllocationStack(allocator_type);
   }
-  static bool IsCompactingGC(CollectorType collector_type) {
+  static bool IsMovingGc(CollectorType collector_type) {
     return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS ||
         collector_type == kCollectorTypeCC;
   }
@@ -609,6 +610,10 @@
                                size_t bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
+  // wrong space.
+  void SwapSemiSpaces() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Try to allocate a number of bytes, this function never does any GCs. Needs to be inlined so
   // that the switch statement is constant optimized in the entrypoints.
   template <const bool kInstrumented, const bool kGrow>
@@ -668,6 +673,10 @@
   // Find a collector based on GC type.
   collector::GarbageCollector* FindCollectorByGcType(collector::GcType gc_type);
 
+  // Create the main free list space, typically either a RosAlloc space or DlMalloc space.
+  void CreateMainMallocSpace(MemMap* mem_map, size_t initial_size, size_t growth_limit,
+                             size_t capacity);
+
   // Given the current contents of the alloc space, increase the allowed heap footprint to match
   // the target utilization ratio.  This should only be called immediately after a full garbage
   // collection.
@@ -737,17 +746,10 @@
   // A remembered set remembers all of the references from the it's space to the target space.
   SafeMap<space::Space*, accounting::RememberedSet*> remembered_sets_;
 
-  // Keep the free list allocator mem map lying around when we transition to background so that we
-  // don't have to worry about virtual address space fragmentation.
-  UniquePtr<MemMap> allocator_mem_map_;
-
-  // The mem-map which we will use for the non-moving space after the zygote is done forking:
-  UniquePtr<MemMap> post_zygote_non_moving_space_mem_map_;
-
   // The current collector type.
   CollectorType collector_type_;
-  // Which collector we will switch to after zygote fork.
-  CollectorType post_zygote_collector_type_;
+  // Which collector we use when the app is in the foreground.
+  CollectorType foreground_collector_type_;
   // Which collector we will use when the app is notified of a transition to background.
   CollectorType background_collector_type_;
   // Desired collector type, heap trimming daemon transitions the heap if it is != collector_type_.
@@ -929,6 +931,9 @@
   // Target ideal heap utilization ratio
   double target_utilization_;
 
+  // How much more we grow the heap when we are a foreground app instead of background.
+  double foreground_heap_growth_multiplier_;
+
   // Total time which mutators are paused or waiting for GC to complete.
   uint64_t total_wait_time_;
 
diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc
index 07e5088..a85ad4d 100644
--- a/runtime/gc/heap_test.cc
+++ b/runtime/gc/heap_test.cc
@@ -60,13 +60,11 @@
 
 TEST_F(HeapTest, HeapBitmapCapacityTest) {
   byte* heap_begin = reinterpret_cast<byte*>(0x1000);
-  const size_t heap_capacity = accounting::SpaceBitmap::kAlignment * (sizeof(intptr_t) * 8 + 1);
-  UniquePtr<accounting::SpaceBitmap> bitmap(accounting::SpaceBitmap::Create("test bitmap",
-                                                                            heap_begin,
-                                                                            heap_capacity));
+  const size_t heap_capacity = kObjectAlignment * (sizeof(intptr_t) * 8 + 1);
+  UniquePtr<accounting::ContinuousSpaceBitmap> bitmap(
+      accounting::ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
   mirror::Object* fake_end_of_heap_object =
-      reinterpret_cast<mirror::Object*>(&heap_begin[heap_capacity -
-                                                    accounting::SpaceBitmap::kAlignment]);
+      reinterpret_cast<mirror::Object*>(&heap_begin[heap_capacity - kObjectAlignment]);
   bitmap->Set(fake_end_of_heap_object);
 }
 
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index a955cc8..90ffe59 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -38,6 +38,10 @@
   return new BumpPointerSpace(name, mem_map.release());
 }
 
+BumpPointerSpace* BumpPointerSpace::CreateFromMemMap(const std::string& name, MemMap* mem_map) {
+  return new BumpPointerSpace(name, mem_map);
+}
+
 BumpPointerSpace::BumpPointerSpace(const std::string& name, byte* begin, byte* limit)
     : ContinuousMemMapAllocSpace(name, nullptr, begin, begin, limit,
                                  kGcRetentionPolicyAlwaysCollect),
@@ -61,9 +65,6 @@
 void BumpPointerSpace::Clear() {
   // Release the pages back to the operating system.
   CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed";
-}
-
-void BumpPointerSpace::Reset() {
   // Reset the end of the space back to the beginning, we move the end forward as we allocate
   // objects.
   SetEnd(Begin());
@@ -196,7 +197,7 @@
   }
 }
 
-accounting::SpaceBitmap::SweepCallback* BumpPointerSpace::GetSweepCallback() {
+accounting::ContinuousSpaceBitmap::SweepCallback* BumpPointerSpace::GetSweepCallback() {
   LOG(FATAL) << "Unimplemented";
   return nullptr;
 }
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 3ab5df4..e52a9a3 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -43,6 +43,7 @@
   // guaranteed to be granted, if it is required, the caller should call Begin on the returned
   // space to confirm the request was granted.
   static BumpPointerSpace* Create(const std::string& name, size_t capacity, byte* requested_begin);
+  static BumpPointerSpace* CreateFromMemMap(const std::string& name, MemMap* mem_map);
 
   // Allocate num_bytes, returns nullptr if the space is full.
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -84,19 +85,16 @@
     return GetMemMap()->Size();
   }
 
-  accounting::SpaceBitmap* GetLiveBitmap() const OVERRIDE {
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
     return nullptr;
   }
 
-  accounting::SpaceBitmap* GetMarkBitmap() const OVERRIDE {
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
     return nullptr;
   }
 
-  // Madvise the memory back to the OS.
-  void Clear() OVERRIDE;
-
-  // Reset the pointer to the start of the space.
-  void Reset() OVERRIDE LOCKS_EXCLUDED(block_lock_);
+  // Reset the space to empty.
+  void Clear() OVERRIDE LOCKS_EXCLUDED(block_lock_);
 
   void Dump(std::ostream& os) const;
 
@@ -113,6 +111,9 @@
     return Begin() == End();
   }
 
+  bool CanMoveObjects() const OVERRIDE {
+    return true;
+  }
 
   bool Contains(const mirror::Object* obj) const {
     const byte* byte_obj = reinterpret_cast<const byte*>(obj);
@@ -137,7 +138,7 @@
   void Walk(ObjectCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  accounting::SpaceBitmap::SweepCallback* GetSweepCallback() OVERRIDE;
+  accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() OVERRIDE;
 
   // Object alignment within the space.
   static constexpr size_t kAlignment = 8;
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index 02d8b54..4c8a35e 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -52,7 +52,7 @@
 inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes,
                                                                size_t* bytes_allocated,
                                                                size_t* usable_size) {
-  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_for_alloc_, num_bytes));
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
   if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 60f566c..41a0458 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "dlmalloc_space.h"
-
 #include "dlmalloc_space-inl.h"
+
 #include "gc/accounting/card_table.h"
+#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -36,15 +36,19 @@
 template class ValgrindMallocSpace<DlMallocSpace, void*>;
 
 DlMallocSpace::DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
-                             byte* end, byte* limit, size_t growth_limit)
-    : MallocSpace(name, mem_map, begin, end, limit, growth_limit),
-      mspace_(mspace), mspace_for_alloc_(mspace) {
+                             byte* end, byte* limit, size_t growth_limit,
+                             bool can_move_objects, size_t starting_size,
+                             size_t initial_size)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit, true, can_move_objects,
+                  starting_size, initial_size),
+      mspace_(mspace) {
   CHECK(mspace != NULL);
 }
 
 DlMallocSpace* DlMallocSpace::CreateFromMemMap(MemMap* mem_map, const std::string& name,
                                                size_t starting_size, size_t initial_size,
-                                               size_t growth_limit, size_t capacity) {
+                                               size_t growth_limit, size_t capacity,
+                                               bool can_move_objects) {
   DCHECK(mem_map != nullptr);
   void* mspace = CreateMspace(mem_map->Begin(), starting_size, initial_size);
   if (mspace == nullptr) {
@@ -62,14 +66,17 @@
   byte* begin = mem_map->Begin();
   if (Runtime::Current()->RunningOnValgrind()) {
     return new ValgrindMallocSpace<DlMallocSpace, void*>(
-        name, mem_map, mspace, begin, end, begin + capacity, growth_limit, initial_size);
+        name, mem_map, mspace, begin, end, begin + capacity, growth_limit, initial_size,
+        can_move_objects, starting_size);
   } else {
-    return new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
+    return new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit,
+                             can_move_objects, starting_size, initial_size);
   }
 }
 
-DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                                     size_t capacity, byte* requested_begin) {
+DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size,
+                                     size_t growth_limit, size_t capacity, byte* requested_begin,
+                                     bool can_move_objects) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
@@ -93,7 +100,7 @@
     return nullptr;
   }
   DlMallocSpace* space = CreateFromMemMap(mem_map, name, starting_size, initial_size,
-                                          growth_limit, capacity);
+                                          growth_limit, capacity, can_move_objects);
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "DlMallocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
@@ -143,8 +150,10 @@
 
 MallocSpace* DlMallocSpace::CreateInstance(const std::string& name, MemMap* mem_map,
                                            void* allocator, byte* begin, byte* end,
-                                           byte* limit, size_t growth_limit) {
-  return new DlMallocSpace(name, mem_map, allocator, begin, end, limit, growth_limit);
+                                           byte* limit, size_t growth_limit,
+                                           bool can_move_objects) {
+  return new DlMallocSpace(name, mem_map, allocator, begin, end, limit, growth_limit,
+                           can_move_objects, starting_size_, initial_size_);
 }
 
 size_t DlMallocSpace::Free(Thread* self, mirror::Object* ptr) {
@@ -280,13 +289,13 @@
 }
 
 void DlMallocSpace::Clear() {
+  size_t footprint_limit = GetFootprintLimit();
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
-  GetLiveBitmap()->Clear();
-  GetMarkBitmap()->Clear();
-}
-
-void DlMallocSpace::Reset() {
-  // TODO: Delete and create new mspace here.
+  live_bitmap_->Clear();
+  mark_bitmap_->Clear();
+  end_ = Begin() + starting_size_;
+  mspace_ = CreateMspace(mem_map_->Begin(), starting_size_, initial_size_);
+  SetFootprintLimit(footprint_limit);
 }
 
 #ifndef NDEBUG
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 76c4489..accd26b 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -36,14 +36,15 @@
   // Create a DlMallocSpace from an existing mem_map.
   static DlMallocSpace* CreateFromMemMap(MemMap* mem_map, const std::string& name,
                                          size_t starting_size, size_t initial_size,
-                                         size_t growth_limit, size_t capacity);
+                                         size_t growth_limit, size_t capacity,
+                                         bool can_move_objects);
 
   // Create a DlMallocSpace with the requested sizes. The requested
   // base address is not guaranteed to be granted, if it is required,
   // the caller should call Begin on the returned space to confirm the
   // request was granted.
   static DlMallocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                               size_t capacity, byte* requested_begin);
+                               size_t capacity, byte* requested_begin, bool can_move_objects);
 
   // Virtual to allow ValgrindMallocSpace to intercept.
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -107,13 +108,13 @@
   void SetFootprintLimit(size_t limit) OVERRIDE;
 
   MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
-                              byte* begin, byte* end, byte* limit, size_t growth_limit);
+                              byte* begin, byte* end, byte* limit, size_t growth_limit,
+                              bool can_move_objects);
 
   uint64_t GetBytesAllocated() OVERRIDE;
   uint64_t GetObjectsAllocated() OVERRIDE;
 
-  void Clear() OVERRIDE;
-  void Reset() OVERRIDE;
+  virtual void Clear() OVERRIDE;
 
   bool IsDlMallocSpace() const OVERRIDE {
     return true;
@@ -125,7 +126,8 @@
 
  protected:
   DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin, byte* end,
-                byte* limit, size_t growth_limit);
+                byte* limit, size_t growth_limit, bool can_move_objects, size_t starting_size,
+                size_t initial_size);
 
  private:
   mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -142,11 +144,7 @@
   static const size_t kChunkOverhead = kWordSize;
 
   // Underlying malloc space.
-  void* const mspace_;
-
-  // An mspace pointer used for allocation. Equals  mspace_ or nullptr after InvalidateAllocator()
-  // is called.
-  void* mspace_for_alloc_;
+  void* mspace_;
 
   friend class collector::MarkSweep;
 
diff --git a/runtime/gc/space/dlmalloc_space_base_test.cc b/runtime/gc/space/dlmalloc_space_base_test.cc
index 508d869..129eace 100644
--- a/runtime/gc/space/dlmalloc_space_base_test.cc
+++ b/runtime/gc/space/dlmalloc_space_base_test.cc
@@ -23,7 +23,7 @@
 
 MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin);
+  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
 }
 
 TEST_SPACE_CREATE_FN_BASE(DlMallocSpace, CreateDlMallocSpace)
diff --git a/runtime/gc/space/dlmalloc_space_random_test.cc b/runtime/gc/space/dlmalloc_space_random_test.cc
index 43a1bf0..c4f8bae 100644
--- a/runtime/gc/space/dlmalloc_space_random_test.cc
+++ b/runtime/gc/space/dlmalloc_space_random_test.cc
@@ -23,7 +23,7 @@
 
 MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin);
+  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
 }
 
 TEST_SPACE_CREATE_FN_RANDOM(DlMallocSpace, CreateDlMallocSpace)
diff --git a/runtime/gc/space/dlmalloc_space_static_test.cc b/runtime/gc/space/dlmalloc_space_static_test.cc
index 4fbc81e..edaa198 100644
--- a/runtime/gc/space/dlmalloc_space_static_test.cc
+++ b/runtime/gc/space/dlmalloc_space_static_test.cc
@@ -23,7 +23,7 @@
 
 MallocSpace* CreateDlMallocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
-  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin);
+  return DlMallocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin, false);
 }
 
 TEST_SPACE_CREATE_FN_STATIC(DlMallocSpace, CreateDlMallocSpace)
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index faa539f..91d8820 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -35,7 +35,7 @@
 Atomic<uint32_t> ImageSpace::bitmap_index_(0);
 
 ImageSpace::ImageSpace(const std::string& name, MemMap* mem_map,
-                       accounting::SpaceBitmap* live_bitmap)
+                       accounting::ContinuousSpaceBitmap* live_bitmap)
     : MemMapSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
                   kGcRetentionPolicyNeverCollect) {
   DCHECK(live_bitmap != nullptr);
@@ -197,10 +197,10 @@
   uint32_t bitmap_index = bitmap_index_.FetchAndAdd(1);
   std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_file_name,
                                        bitmap_index));
-  UniquePtr<accounting::SpaceBitmap> bitmap(
-      accounting::SpaceBitmap::CreateFromMemMap(bitmap_name, image_map.release(),
-                                                reinterpret_cast<byte*>(map->Begin()),
-                                                map->Size()));
+  UniquePtr<accounting::ContinuousSpaceBitmap> bitmap(
+      accounting::ContinuousSpaceBitmap::CreateFromMemMap(bitmap_name, image_map.release(),
+                                                          reinterpret_cast<byte*>(map->Begin()),
+                                                          map->Size()));
   if (bitmap.get() == nullptr) {
     *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
     return nullptr;
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 116c498..f6daf89 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_GC_SPACE_IMAGE_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_IMAGE_SPACE_H_
 
+#include "gc/accounting/space_bitmap.h"
 #include "space.h"
 
 namespace art {
@@ -59,11 +60,11 @@
     return GetName();
   }
 
-  accounting::SpaceBitmap* GetLiveBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
     return live_bitmap_.get();
   }
 
-  accounting::SpaceBitmap* GetMarkBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
     // ImageSpaces have the same bitmap for both live and marked. This helps reduce the number of
     // special cases to test against.
     return live_bitmap_.get();
@@ -75,6 +76,10 @@
   void Sweep(bool /* swap_bitmaps */, size_t* /* freed_objects */, size_t* /* freed_bytes */) {
   }
 
+  bool CanMoveObjects() const OVERRIDE {
+    return false;
+  }
+
  private:
   // Tries to initialize an ImageSpace from the given image path,
   // returning NULL on error.
@@ -96,9 +101,10 @@
 
   static Atomic<uint32_t> bitmap_index_;
 
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::ContinuousSpaceBitmap> live_bitmap_;
 
-  ImageSpace(const std::string& name, MemMap* mem_map, accounting::SpaceBitmap* live_bitmap);
+  ImageSpace(const std::string& name, MemMap* mem_map,
+             accounting::ContinuousSpaceBitmap* live_bitmap);
 
   // The OatFile associated with the image during early startup to
   // reserve space contiguous to the image. It is later released to
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index eb01325..18e518f 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -75,6 +75,10 @@
 
   void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
 
+  virtual bool CanMoveObjects() const OVERRIDE {
+    return false;
+  }
+
  protected:
   explicit LargeObjectSpace(const std::string& name);
 
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index dac043e..eaf14fb 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -37,24 +37,26 @@
 
 MallocSpace::MallocSpace(const std::string& name, MemMap* mem_map,
                          byte* begin, byte* end, byte* limit, size_t growth_limit,
-                         bool create_bitmaps)
+                         bool create_bitmaps, bool can_move_objects, size_t starting_size,
+                         size_t initial_size)
     : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
       recent_free_pos_(0), lock_("allocation space lock", kAllocSpaceLock),
-      growth_limit_(growth_limit) {
+      growth_limit_(growth_limit), can_move_objects_(can_move_objects),
+      starting_size_(starting_size), initial_size_(initial_size) {
   if (create_bitmaps) {
     size_t bitmap_index = bitmap_index_++;
     static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
     CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
     CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
-    live_bitmap_.reset(accounting::SpaceBitmap::Create(
+    live_bitmap_.reset(accounting::ContinuousSpaceBitmap::Create(
         StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
         Begin(), Capacity()));
-    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #"
+    DCHECK(live_bitmap_.get() != nullptr) << "could not create allocspace live bitmap #"
         << bitmap_index;
-    mark_bitmap_.reset(accounting::SpaceBitmap::Create(
+    mark_bitmap_.reset(accounting::ContinuousSpaceBitmap::Create(
         StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
         Begin(), Capacity()));
-    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #"
+    DCHECK(live_bitmap_.get() != nullptr) << "could not create allocspace mark bitmap #"
         << bitmap_index;
   }
   for (auto& freed : recent_freed_objects_) {
@@ -178,11 +180,6 @@
              << "GrowthLimit " << growth_limit_ << "\n"
              << "Capacity " << Capacity();
   SetGrowthLimit(RoundUp(size, kPageSize));
-  SetFootprintLimit(RoundUp(size, kPageSize));
-
-  // TODO: Not hardcode these in?
-  const size_t starting_size = kPageSize;
-  const size_t initial_size = 2 * MB;
   // FIXME: Do we need reference counted pointers here?
   // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
   VLOG(heap) << "Creating new AllocSpace: ";
@@ -194,14 +191,14 @@
   UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
                                                     PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
-  void* allocator = CreateAllocator(end_, starting_size, initial_size, capacity, low_memory_mode);
+  void* allocator = CreateAllocator(end_, starting_size_, initial_size_, capacity, low_memory_mode);
   // Protect memory beyond the initial size.
-  byte* end = mem_map->Begin() + starting_size;
-  if (capacity - initial_size > 0) {
-    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
+  byte* end = mem_map->Begin() + starting_size_;
+  if (capacity > initial_size_) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size_, PROT_NONE), alloc_space_name);
   }
   *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, end_, end,
-                                     limit_, growth_limit);
+                                     limit_, growth_limit, CanMoveObjects());
   SetLimit(End());
   live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
   CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
@@ -236,7 +233,7 @@
   // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
   // the bitmaps as an optimization.
   if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* bitmap = space->GetLiveBitmap();
     for (size_t i = 0; i < num_ptrs; ++i) {
       bitmap->Clear(ptrs[i]);
     }
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index fbcee5f..d24016c 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -114,7 +114,8 @@
   void SetGrowthLimit(size_t growth_limit);
 
   virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
-                                      byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
+                                      byte* begin, byte* end, byte* limit, size_t growth_limit,
+                                      bool can_move_objects) = 0;
 
   // Splits ourself into a zygote space and new malloc space which has our unused memory. When true,
   // the low memory mode argument specifies that the heap wishes the created space to be more
@@ -127,9 +128,14 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
+  bool CanMoveObjects() const OVERRIDE {
+    return can_move_objects_;
+  }
+
  protected:
   MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
-              byte* limit, size_t growth_limit, bool create_bitmaps = true);
+              byte* limit, size_t growth_limit, bool create_bitmaps, bool can_move_objects,
+              size_t starting_size, size_t initial_size);
 
   static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
                               size_t* growth_limit, size_t* capacity, byte* requested_begin);
@@ -143,7 +149,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+  virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() {
     return &SweepCallback;
   }
 
@@ -167,6 +173,13 @@
   // one time by a call to ClearGrowthLimit.
   size_t growth_limit_;
 
+  // True if objects in the space are movable.
+  const bool can_move_objects_;
+
+  // Starting and initial sized, used when you reset the space.
+  const size_t starting_size_;
+  const size_t initial_size_;
+
  private:
   static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 5c5e7f8..5a7d941 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-#include "rosalloc_space.h"
-
 #include "rosalloc_space-inl.h"
+
 #include "gc/accounting/card_table.h"
+#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -34,19 +34,23 @@
 
 static constexpr bool kPrefetchDuringRosAllocFreeList = true;
 
-template class ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>;
+// TODO: Fix
+// template class ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>;
 
 RosAllocSpace::RosAllocSpace(const std::string& name, MemMap* mem_map,
                              art::gc::allocator::RosAlloc* rosalloc, byte* begin, byte* end,
-                             byte* limit, size_t growth_limit)
-    : MallocSpace(name, mem_map, begin, end, limit, growth_limit), rosalloc_(rosalloc) {
-  CHECK(rosalloc != NULL);
+                             byte* limit, size_t growth_limit, bool can_move_objects,
+                             size_t starting_size, size_t initial_size, bool low_memory_mode)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit, true, can_move_objects,
+                  starting_size, initial_size),
+      rosalloc_(rosalloc), low_memory_mode_(low_memory_mode) {
+  CHECK(rosalloc != nullptr);
 }
 
 RosAllocSpace* RosAllocSpace::CreateFromMemMap(MemMap* mem_map, const std::string& name,
                                                size_t starting_size, size_t initial_size,
                                                size_t growth_limit, size_t capacity,
-                                               bool low_memory_mode) {
+                                               bool low_memory_mode, bool can_move_objects) {
   DCHECK(mem_map != nullptr);
   allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size,
                                                  capacity, low_memory_mode);
@@ -66,10 +70,10 @@
   // TODO: Fix RosAllocSpace to support valgrind. There is currently some issues with
   // AllocationSize caused by redzones. b/12944686
   if (false && Runtime::Current()->GetHeap()->RunningOnValgrind()) {
-    return new ValgrindMallocSpace<RosAllocSpace, allocator::RosAlloc*>(
-        name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
+    LOG(FATAL) << "Unimplemented";
   } else {
-    return new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
+    return new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit,
+                             can_move_objects, starting_size, initial_size, low_memory_mode);
   }
 }
 
@@ -79,7 +83,7 @@
 
 RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size,
                                      size_t growth_limit, size_t capacity, byte* requested_begin,
-                                     bool low_memory_mode) {
+                                     bool low_memory_mode, bool can_move_objects) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
@@ -104,7 +108,8 @@
   }
 
   RosAllocSpace* space = CreateFromMemMap(mem_map, name, starting_size, initial_size,
-                                          growth_limit, capacity, low_memory_mode);
+                                          growth_limit, capacity, low_memory_mode,
+                                          can_move_objects);
   // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "RosAllocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
@@ -113,7 +118,8 @@
   return space;
 }
 
-allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start, size_t initial_size,
+allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start,
+                                                   size_t initial_size,
                                                    size_t maximum_size, bool low_memory_mode) {
   // clear errno to allow PLOG on error
   errno = 0;
@@ -154,9 +160,11 @@
 }
 
 MallocSpace* RosAllocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
-                                           byte* begin, byte* end, byte* limit, size_t growth_limit) {
+                                           byte* begin, byte* end, byte* limit, size_t growth_limit,
+                                           bool can_move_objects) {
   return new RosAllocSpace(name, mem_map, reinterpret_cast<allocator::RosAlloc*>(allocator),
-                           begin, end, limit, growth_limit);
+                           begin, end, limit, growth_limit, can_move_objects, starting_size_,
+                           initial_size_, low_memory_mode_);
 }
 
 size_t RosAllocSpace::Free(Thread* self, mirror::Object* ptr) {
@@ -333,13 +341,15 @@
 }
 
 void RosAllocSpace::Clear() {
+  size_t footprint_limit = GetFootprintLimit();
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
-  GetLiveBitmap()->Clear();
-  GetMarkBitmap()->Clear();
-}
-
-void RosAllocSpace::Reset() {
-  // TODO: Delete and create new mspace here.
+  live_bitmap_->Clear();
+  mark_bitmap_->Clear();
+  end_ = begin_ + starting_size_;
+  delete rosalloc_;
+  rosalloc_ = CreateRosAlloc(mem_map_->Begin(), starting_size_, initial_size_, Capacity(),
+                             low_memory_mode_);
+  SetFootprintLimit(footprint_limit);
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 900e7a9..a156738 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -39,11 +39,12 @@
   // the caller should call Begin on the returned space to confirm the
   // request was granted.
   static RosAllocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
-                               size_t capacity, byte* requested_begin, bool low_memory_mode);
+                               size_t capacity, byte* requested_begin, bool low_memory_mode,
+                               bool can_move_objects);
   static RosAllocSpace* CreateFromMemMap(MemMap* mem_map, const std::string& name,
                                          size_t starting_size, size_t initial_size,
                                          size_t growth_limit, size_t capacity,
-                                         bool low_memory_mode);
+                                         bool low_memory_mode, bool can_move_objects);
 
   mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated,
                                   size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_);
@@ -80,9 +81,10 @@
   void SetFootprintLimit(size_t limit) OVERRIDE;
 
   void Clear() OVERRIDE;
-  void Reset() OVERRIDE;
+
   MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
-                              byte* begin, byte* end, byte* limit, size_t growth_limit);
+                              byte* begin, byte* end, byte* limit, size_t growth_limit,
+                              bool can_move_objects) OVERRIDE;
 
   uint64_t GetBytesAllocated() OVERRIDE;
   uint64_t GetObjectsAllocated() OVERRIDE;
@@ -110,7 +112,8 @@
 
  protected:
   RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
-                byte* begin, byte* end, byte* limit, size_t growth_limit);
+                byte* begin, byte* end, byte* limit, size_t growth_limit, bool can_move_objects,
+                size_t starting_size, size_t initial_size, bool low_memory_mode);
 
  private:
   mirror::Object* AllocCommon(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -132,7 +135,9 @@
       LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
 
   // Underlying rosalloc.
-  allocator::RosAlloc* const rosalloc_;
+  allocator::RosAlloc* rosalloc_;
+
+  const bool low_memory_mode_;
 
   friend class collector::MarkSweep;
 
diff --git a/runtime/gc/space/rosalloc_space_base_test.cc b/runtime/gc/space/rosalloc_space_base_test.cc
index df42076..c3157fa 100644
--- a/runtime/gc/space/rosalloc_space_base_test.cc
+++ b/runtime/gc/space/rosalloc_space_base_test.cc
@@ -23,7 +23,7 @@
 MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
   return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode());
+                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
 }
 
 TEST_SPACE_CREATE_FN_BASE(RosAllocSpace, CreateRosAllocSpace)
diff --git a/runtime/gc/space/rosalloc_space_random_test.cc b/runtime/gc/space/rosalloc_space_random_test.cc
index 4d37c9e..864bbc9 100644
--- a/runtime/gc/space/rosalloc_space_random_test.cc
+++ b/runtime/gc/space/rosalloc_space_random_test.cc
@@ -23,7 +23,7 @@
 MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
   return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode());
+                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
 }
 
 TEST_SPACE_CREATE_FN_RANDOM(RosAllocSpace, CreateRosAllocSpace)
diff --git a/runtime/gc/space/rosalloc_space_static_test.cc b/runtime/gc/space/rosalloc_space_static_test.cc
index 9f11fd0..c0e2ac8 100644
--- a/runtime/gc/space/rosalloc_space_static_test.cc
+++ b/runtime/gc/space/rosalloc_space_static_test.cc
@@ -23,7 +23,7 @@
 MallocSpace* CreateRosAllocSpace(const std::string& name, size_t initial_size, size_t growth_limit,
                                  size_t capacity, byte* requested_begin) {
   return RosAllocSpace::Create(name, initial_size, growth_limit, capacity, requested_begin,
-                               Runtime::Current()->GetHeap()->IsLowMemoryMode());
+                               Runtime::Current()->GetHeap()->IsLowMemoryMode(), false);
 }
 
 TEST_SPACE_CREATE_FN_STATIC(RosAllocSpace, CreateRosAllocSpace)
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index 4af65a9..01e8b04 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -18,6 +18,7 @@
 
 #include "base/logging.h"
 #include "gc/accounting/heap_bitmap.h"
+#include "gc/accounting/space_bitmap-inl.h"
 #include "runtime.h"
 #include "thread-inl.h"
 
@@ -77,8 +78,8 @@
 void ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
   DCHECK(freed_objects != nullptr);
   DCHECK(freed_bytes != nullptr);
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = GetMarkBitmap();
+  accounting::ContinuousSpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::ContinuousSpaceBitmap* mark_bitmap = GetMarkBitmap();
   // If the bitmaps are bound then sweeping this space clearly won't do anything.
   if (live_bitmap == mark_bitmap) {
     return;
@@ -94,11 +95,9 @@
     std::swap(live_bitmap, mark_bitmap);
   }
   // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
-  accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
-                                     reinterpret_cast<uintptr_t>(Begin()),
-                                     reinterpret_cast<uintptr_t>(End()),
-                                     GetSweepCallback(),
-                                     reinterpret_cast<void*>(&scc));
+  accounting::ContinuousSpaceBitmap::SweepWalk(
+      *live_bitmap, *mark_bitmap, reinterpret_cast<uintptr_t>(Begin()),
+      reinterpret_cast<uintptr_t>(End()), GetSweepCallback(), reinterpret_cast<void*>(&scc));
   *freed_objects += scc.freed_objects;
   *freed_bytes += scc.freed_bytes;
 }
@@ -106,9 +105,9 @@
 // Returns the old mark bitmap.
 void ContinuousMemMapAllocSpace::BindLiveToMarkBitmap() {
   CHECK(!HasBoundBitmaps());
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::ContinuousSpaceBitmap* live_bitmap = GetLiveBitmap();
   if (live_bitmap != mark_bitmap_.get()) {
-    accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
+    accounting::ContinuousSpaceBitmap* mark_bitmap = mark_bitmap_.release();
     Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
     temp_bitmap_.reset(mark_bitmap);
     mark_bitmap_.reset(live_bitmap);
@@ -122,7 +121,7 @@
 void ContinuousMemMapAllocSpace::UnBindBitmaps() {
   CHECK(HasBoundBitmaps());
   // At this point, the temp_bitmap holds our old mark bitmap.
-  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
+  accounting::ContinuousSpaceBitmap* new_bitmap = temp_bitmap_.release();
   Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap_.get(), new_bitmap);
   CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
   mark_bitmap_.reset(new_bitmap);
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 37d7c80..2b27f87 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -34,10 +34,6 @@
 
 namespace gc {
 
-namespace accounting {
-  class SpaceBitmap;
-}  // namespace accounting
-
 class Heap;
 
 namespace space {
@@ -160,6 +156,9 @@
   }
   virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace();
 
+  // Returns true if objects in the space are movable.
+  virtual bool CanMoveObjects() const = 0;
+
   virtual ~Space() {}
 
  protected:
@@ -265,8 +264,8 @@
     return End() - Begin();
   }
 
-  virtual accounting::SpaceBitmap* GetLiveBitmap() const = 0;
-  virtual accounting::SpaceBitmap* GetMarkBitmap() const = 0;
+  virtual accounting::ContinuousSpaceBitmap* GetLiveBitmap() const = 0;
+  virtual accounting::ContinuousSpaceBitmap* GetMarkBitmap() const = 0;
 
   // Maximum which the mapped space can grow to.
   virtual size_t Capacity() const {
@@ -396,27 +395,24 @@
   // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
   void SwapBitmaps();
 
-  // Free all memory associated with this space.
+  // Clear the space back to an empty space.
   virtual void Clear() = 0;
 
-  // Reset the space back to an empty space.
-  virtual void Reset() = 0;
-
-  accounting::SpaceBitmap* GetLiveBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const {
     return live_bitmap_.get();
   }
 
-  accounting::SpaceBitmap* GetMarkBitmap() const {
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const {
     return mark_bitmap_.get();
   }
 
   void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
-  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() = 0;
+  virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() = 0;
 
  protected:
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+  UniquePtr<accounting::ContinuousSpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::ContinuousSpaceBitmap> mark_bitmap_;
+  UniquePtr<accounting::ContinuousSpaceBitmap> temp_bitmap_;
 
   ContinuousMemMapAllocSpace(const std::string& name, MemMap* mem_map, byte* begin,
                              byte* end, byte* limit, GcRetentionPolicy gc_retention_policy)
diff --git a/runtime/gc/space/valgrind_malloc_space-inl.h b/runtime/gc/space/valgrind_malloc_space-inl.h
index ed97e60..966c276 100644
--- a/runtime/gc/space/valgrind_malloc_space-inl.h
+++ b/runtime/gc/space/valgrind_malloc_space-inl.h
@@ -95,8 +95,10 @@
 ValgrindMallocSpace<S, A>::ValgrindMallocSpace(const std::string& name, MemMap* mem_map,
                                                A allocator, byte* begin,
                                                byte* end, byte* limit, size_t growth_limit,
-                                               size_t initial_size) :
-    S(name, mem_map, allocator, begin, end, limit, growth_limit) {
+                                               size_t initial_size,
+                                               bool can_move_objects, size_t starting_size) :
+    S(name, mem_map, allocator, begin, end, limit, growth_limit, can_move_objects, starting_size,
+      initial_size) {
   VALGRIND_MAKE_MEM_UNDEFINED(mem_map->Begin() + initial_size, mem_map->Size() - initial_size);
 }
 
diff --git a/runtime/gc/space/valgrind_malloc_space.h b/runtime/gc/space/valgrind_malloc_space.h
index 6b755c4..200ad83 100644
--- a/runtime/gc/space/valgrind_malloc_space.h
+++ b/runtime/gc/space/valgrind_malloc_space.h
@@ -48,7 +48,7 @@
 
   ValgrindMallocSpace(const std::string& name, MemMap* mem_map, AllocatorType allocator,
                       byte* begin, byte* end, byte* limit, size_t growth_limit,
-                      size_t initial_size);
+                      size_t initial_size, bool can_move_objects, size_t starting_size);
   virtual ~ValgrindMallocSpace() {}
 
  private:
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index d1c3d03..1b06b63 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -40,8 +40,8 @@
 };
 
 ZygoteSpace* ZygoteSpace::Create(const std::string& name, MemMap* mem_map,
-                                 accounting::SpaceBitmap* live_bitmap,
-                                 accounting::SpaceBitmap* mark_bitmap) {
+                                 accounting::ContinuousSpaceBitmap* live_bitmap,
+                                 accounting::ContinuousSpaceBitmap* mark_bitmap) {
   DCHECK(live_bitmap != nullptr);
   DCHECK(mark_bitmap != nullptr);
   size_t objects_allocated = 0;
@@ -61,10 +61,6 @@
   LOG(FATAL) << "Unimplemented";
 }
 
-void ZygoteSpace::Reset() {
-  LOG(FATAL) << "Unimplemented";
-}
-
 ZygoteSpace::ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated)
     : ContinuousMemMapAllocSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
                                  kGcRetentionPolicyFullCollect),
@@ -109,7 +105,7 @@
   // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
   // the bitmaps as an optimization.
   if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = zygote_space->GetLiveBitmap();
+    accounting::ContinuousSpaceBitmap* bitmap = zygote_space->GetLiveBitmap();
     for (size_t i = 0; i < num_ptrs; ++i) {
       bitmap->Clear(ptrs[i]);
     }
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
index 8880548..50fc62b 100644
--- a/runtime/gc/space/zygote_space.h
+++ b/runtime/gc/space/zygote_space.h
@@ -17,16 +17,13 @@
 #ifndef ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
 
+#include "gc/accounting/space_bitmap.h"
 #include "malloc_space.h"
 #include "mem_map.h"
 
 namespace art {
 namespace gc {
 
-namespace accounting {
-class SpaceBitmap;
-}
-
 namespace space {
 
 // An zygote space is a space which you cannot allocate into or free from.
@@ -34,8 +31,8 @@
  public:
   // Returns the remaining storage in the out_map field.
   static ZygoteSpace* Create(const std::string& name, MemMap* mem_map,
-                             accounting::SpaceBitmap* live_bitmap,
-                             accounting::SpaceBitmap* mark_bitmap)
+                             accounting::ContinuousSpaceBitmap* live_bitmap,
+                             accounting::ContinuousSpaceBitmap* mark_bitmap)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Dump(std::ostream& os) const;
@@ -72,10 +69,13 @@
   }
 
   void Clear() OVERRIDE;
-  void Reset() OVERRIDE;
+
+  bool CanMoveObjects() const OVERRIDE {
+    return false;
+  }
 
  protected:
-  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+  virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() {
     return &SweepCallback;
   }
 
diff --git a/runtime/globals.h b/runtime/globals.h
index 7e85231..e3c54b8 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -50,9 +50,6 @@
 // Required stack alignment
 static constexpr size_t kStackAlignment = 16;
 
-// Required object alignment
-static constexpr size_t kObjectAlignment = 8;
-
 // ARM instruction alignment. ARM processors require code to be 4-byte aligned,
 // but ARM ELF requires 8..
 static constexpr size_t kArmAlignment = 8;
@@ -72,6 +69,10 @@
 // compile-time constant so the compiler can generate better code.
 static constexpr int kPageSize = 4096;
 
+// Required object alignment
+static constexpr size_t kObjectAlignment = 8;
+static constexpr size_t kLargeObjectAlignment = kPageSize;
+
 // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
 #if defined(NDEBUG)
 static constexpr bool kIsDebugBuild = false;
diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc
index c964629..73d4279 100644
--- a/runtime/instruction_set.cc
+++ b/runtime/instruction_set.cc
@@ -16,8 +16,78 @@
 
 #include "instruction_set.h"
 
+#include "globals.h"
+#include "base/logging.h"  // Logging is required for FATAL in the helper functions.
+
 namespace art {
 
+size_t GetInstructionSetPointerSize(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return kArmPointerSize;
+    case kArm64:
+      return kArm64PointerSize;
+    case kX86:
+      return kX86PointerSize;
+    case kX86_64:
+      return kX86_64PointerSize;
+    case kMips:
+      return kMipsPointerSize;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have pointer size.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
+
+size_t GetInstructionSetAlignment(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return kArmAlignment;
+    case kArm64:
+      return kArm64Alignment;
+    case kX86:
+      // Fall-through.
+    case kX86_64:
+      return kX86Alignment;
+    case kMips:
+      return kMipsAlignment;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have alignment.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
+
+bool Is64BitInstructionSet(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+    case kX86:
+    case kMips:
+      return false;
+
+    case kArm64:
+    case kX86_64:
+      return true;
+
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have bit width.";
+      return 0;
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      return 0;
+  }
+}
+
 std::string InstructionSetFeatures::GetFeatureString() const {
   std::string result;
   if ((mask_ & kHwDiv) != 0) {
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index a08becf..c746e06 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -35,6 +35,10 @@
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
+size_t GetInstructionSetPointerSize(InstructionSet isa);
+size_t GetInstructionSetAlignment(InstructionSet isa);
+bool Is64BitInstructionSet(InstructionSet isa);
+
 #if defined(__arm__)
 static constexpr InstructionSet kRuntimeISA = kArm;
 #elif defined(__aarch64__)
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 525e2b3..bcde9e5 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -63,6 +63,7 @@
       interpret_only_(false), forced_interpret_only_(false),
       have_method_entry_listeners_(false), have_method_exit_listeners_(false),
       have_method_unwind_listeners_(false), have_dex_pc_listeners_(false),
+      have_field_read_listeners_(false), have_field_write_listeners_(false),
       have_exception_caught_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
@@ -373,6 +374,14 @@
     dex_pc_listeners_.push_back(listener);
     have_dex_pc_listeners_ = true;
   }
+  if ((events & kFieldRead) != 0) {
+    field_read_listeners_.push_back(listener);
+    have_field_read_listeners_ = true;
+  }
+  if ((events & kFieldWritten) != 0) {
+    field_write_listeners_.push_back(listener);
+    have_field_write_listeners_ = true;
+  }
   if ((events & kExceptionCaught) != 0) {
     exception_caught_listeners_.push_back(listener);
     have_exception_caught_listeners_ = true;
@@ -410,6 +419,22 @@
     }
     have_dex_pc_listeners_ = dex_pc_listeners_.size() > 0;
   }
+  if ((events & kFieldRead) != 0) {
+    bool contains = std::find(field_read_listeners_.begin(), field_read_listeners_.end(),
+                              listener) != field_read_listeners_.end();
+    if (contains) {
+      field_read_listeners_.remove(listener);
+    }
+    have_field_read_listeners_ = field_read_listeners_.size() > 0;
+  }
+  if ((events & kFieldWritten) != 0) {
+    bool contains = std::find(field_write_listeners_.begin(), field_write_listeners_.end(),
+                              listener) != field_write_listeners_.end();
+    if (contains) {
+      field_write_listeners_.remove(listener);
+    }
+    have_field_write_listeners_ = field_write_listeners_.size() > 0;
+  }
   if ((events & kExceptionCaught) != 0) {
     exception_caught_listeners_.remove(listener);
     have_exception_caught_listeners_ = exception_caught_listeners_.size() > 0;
@@ -743,6 +768,30 @@
   }
 }
 
+void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
+                                         mirror::ArtMethod* method, uint32_t dex_pc,
+                                         mirror::ArtField* field) const {
+  if (have_field_read_listeners_) {
+    // TODO: same comment than DexPcMovedEventImpl.
+    std::list<InstrumentationListener*> copy(field_read_listeners_);
+    for (InstrumentationListener* listener : copy) {
+      listener->FieldRead(thread, this_object, method, dex_pc, field);
+    }
+  }
+}
+
+void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
+                                         mirror::ArtMethod* method, uint32_t dex_pc,
+                                         mirror::ArtField* field, const JValue& field_value) const {
+  if (have_field_write_listeners_) {
+    // TODO: same comment than DexPcMovedEventImpl.
+    std::list<InstrumentationListener*> copy(field_write_listeners_);
+    for (InstrumentationListener* listener : copy) {
+      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
+    }
+  }
+}
+
 void Instrumentation::ExceptionCaughtEvent(Thread* thread, const ThrowLocation& throw_location,
                                            mirror::ArtMethod* catch_method,
                                            uint32_t catch_dex_pc,
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 2a9c35f..3de0728 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -28,6 +28,7 @@
 
 namespace art {
 namespace mirror {
+  class ArtField;
   class ArtMethod;
   class Class;
   class Object;
@@ -78,6 +79,14 @@
                           mirror::ArtMethod* method, uint32_t new_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
+  // Call-back for when we read from a field.
+  virtual void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                         uint32_t dex_pc, mirror::ArtField* field) = 0;
+
+  // Call-back for when we write into a field.
+  virtual void FieldWritten(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
+                            uint32_t dex_pc, mirror::ArtField* field, const JValue& field_value) = 0;
+
   // Call-back when an exception is caught.
   virtual void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
                                mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
@@ -92,11 +101,13 @@
 class Instrumentation {
  public:
   enum InstrumentationEvent {
-    kMethodEntered = 1,
-    kMethodExited = 2,
-    kMethodUnwind = 4,
-    kDexPcMoved = 8,
-    kExceptionCaught = 16
+    kMethodEntered =   1 << 0,
+    kMethodExited =    1 << 1,
+    kMethodUnwind =    1 << 2,
+    kDexPcMoved =      1 << 3,
+    kFieldRead =       1 << 4,
+    kFieldWritten =    1 << 5,
+    kExceptionCaught = 1 << 6,
   };
 
   Instrumentation();
@@ -217,6 +228,14 @@
     return have_dex_pc_listeners_;
   }
 
+  bool HasFieldReadListeners() const {
+    return have_field_read_listeners_;
+  }
+
+  bool HasFieldWriteListeners() const {
+    return have_field_write_listeners_;
+  }
+
   bool IsActive() const {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_exception_caught_listeners_ || have_method_unwind_listeners_;
@@ -256,6 +275,26 @@
     }
   }
 
+  // Inform listeners that we read a field (only supported by the interpreter).
+  void FieldReadEvent(Thread* thread, mirror::Object* this_object,
+                      mirror::ArtMethod* method, uint32_t dex_pc,
+                      mirror::ArtField* field) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(HasFieldReadListeners())) {
+      FieldReadEventImpl(thread, this_object, method, dex_pc, field);
+    }
+  }
+
+  // Inform listeners that we write a field (only supported by the interpreter).
+  void FieldWriteEvent(Thread* thread, mirror::Object* this_object,
+                       mirror::ArtMethod* method, uint32_t dex_pc,
+                       mirror::ArtField* field, const JValue& field_value) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(HasFieldWriteListeners())) {
+      FieldWriteEventImpl(thread, this_object, method, dex_pc, field, field_value);
+    }
+  }
+
   // Inform listeners that an exception was caught.
   void ExceptionCaughtEvent(Thread* thread, const ThrowLocation& throw_location,
                             mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
@@ -313,6 +352,14 @@
   void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
                            mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
+                           mirror::ArtMethod* method, uint32_t dex_pc,
+                           mirror::ArtField* field) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
+                           mirror::ArtMethod* method, uint32_t dex_pc,
+                           mirror::ArtField* field, const JValue& field_value) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Have we hijacked ArtMethod::code_ so that it calls instrumentation/interpreter code?
   bool instrumentation_stubs_installed_;
@@ -345,6 +392,14 @@
   // instrumentation_lock_.
   bool have_dex_pc_listeners_;
 
+  // Do we have any listeners for field read events? Short-cut to avoid taking the
+  // instrumentation_lock_.
+  bool have_field_read_listeners_;
+
+  // Do we have any listeners for field write events? Short-cut to avoid taking the
+  // instrumentation_lock_.
+  bool have_field_write_listeners_;
+
   // Do we have any exception caught listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_exception_caught_listeners_;
 
@@ -353,6 +408,8 @@
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> field_read_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> field_write_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
   // The set of methods being deoptimized (by the debugger) which must be executed with interpreter
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index 66ebb96..1477324 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -197,6 +197,17 @@
      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
+   * A field of interest has been accessed or modified. This is used for field access and field
+   * modification events.
+   *
+   * "fieldValue" is non-null for field modification events only.
+   * "is_modification" is true for field modification, false for field access.
+   */
+  bool PostFieldEvent(const JdwpLocation* pLoc, RefTypeId typeId, FieldId fieldId,
+                      ObjectId thisPtr, const JValue* fieldValue, bool is_modification)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  /*
    * An exception has been thrown.
    *
    * Pass in a zeroed-out "*pCatchLoc" if the exception wasn't caught.
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 9b3ea2e..6908047 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -121,26 +121,14 @@
   /* nothing for StepOnly -- handled differently */
 };
 
-/*
- * Dump an event to the log file.
- */
-static void dumpEvent(const JdwpEvent* pEvent) {
-  LOG(INFO) << StringPrintf("Event id=0x%4x %p (prev=%p next=%p):", pEvent->requestId, pEvent, pEvent->prev, pEvent->next);
-  LOG(INFO) << "  kind=" << pEvent->eventKind << " susp=" << pEvent->suspend_policy << " modCount=" << pEvent->modCount;
-
-  for (int i = 0; i < pEvent->modCount; i++) {
-    const JdwpEventMod* pMod = &pEvent->mods[i];
-    LOG(INFO) << "  " << pMod->modKind;
-    /* TODO - show details */
-  }
-}
-
 static bool NeedsFullDeoptimization(JdwpEventKind eventKind) {
   switch (eventKind) {
       case EK_METHOD_ENTRY:
       case EK_METHOD_EXIT:
       case EK_METHOD_EXIT_WITH_RETURN_VALUE:
       case EK_SINGLE_STEP:
+      case EK_FIELD_ACCESS:
+      case EK_FIELD_MODIFICATION:
         return true;
       default:
         return false;
@@ -177,9 +165,6 @@
       if (status != ERR_NONE) {
         return status;
       }
-    } else if (pMod->modKind == MK_FIELD_ONLY) {
-      /* should be for EK_FIELD_ACCESS or EK_FIELD_MODIFICATION */
-      dumpEvent(pEvent);  /* TODO - need for field watches */
     }
   }
   if (NeedsFullDeoptimization(pEvent->eventKind)) {
@@ -422,6 +407,9 @@
     case MK_COUNT:
       CHECK_GT(pMod->count.count, 0);
       pMod->count.count--;
+      if (pMod->count.count > 0) {
+        return false;
+      }
       break;
     case MK_CONDITIONAL:
       CHECK(false);  // should not be getting these
@@ -843,6 +831,86 @@
   return match_count != 0;
 }
 
+bool JdwpState::PostFieldEvent(const JdwpLocation* pLoc, RefTypeId typeId, FieldId fieldId,
+                               ObjectId thisPtr, const JValue* fieldValue, bool is_modification) {
+  ModBasket basket;
+  basket.pLoc = pLoc;
+  basket.classId = pLoc->class_id;
+  basket.thisPtr = thisPtr;
+  basket.threadId = Dbg::GetThreadSelfId();
+  basket.className = Dbg::GetClassName(pLoc->class_id);
+  basket.field = fieldId;
+
+  if (InvokeInProgress()) {
+    VLOG(jdwp) << "Not posting field event during invoke";
+    return false;
+  }
+
+  // Get field's reference type tag.
+  JDWP::JdwpTypeTag type_tag;
+  uint32_t class_status;  // unused here.
+  JdwpError error = Dbg::GetClassInfo(typeId, &type_tag, &class_status, NULL);
+  if (error != ERR_NONE) {
+    return false;
+  }
+
+  // Get instance type tag.
+  uint8_t tag;
+  error = Dbg::GetObjectTag(thisPtr, tag);
+  if (error != ERR_NONE) {
+    return false;
+  }
+
+  int match_count = 0;
+  ExpandBuf* pReq = NULL;
+  JdwpSuspendPolicy suspend_policy = SP_NONE;
+  {
+    MutexLock mu(Thread::Current(), event_list_lock_);
+    JdwpEvent** match_list = AllocMatchList(event_list_size_);
+
+    if (is_modification) {
+      FindMatchingEvents(EK_FIELD_MODIFICATION, &basket, match_list, &match_count);
+    } else {
+      FindMatchingEvents(EK_FIELD_ACCESS, &basket, match_list, &match_count);
+    }
+    if (match_count != 0) {
+      VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
+                 << basket.className << "." << Dbg::GetMethodName(pLoc->method_id)
+                 << StringPrintf(" thread=%#" PRIx64 "  dex_pc=%#" PRIx64 ")",
+                                 basket.threadId, pLoc->dex_pc);
+
+      suspend_policy = scanSuspendPolicy(match_list, match_count);
+      VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
+
+      pReq = eventPrep();
+      expandBufAdd1(pReq, suspend_policy);
+      expandBufAdd4BE(pReq, match_count);
+
+      for (int i = 0; i < match_count; i++) {
+        expandBufAdd1(pReq, match_list[i]->eventKind);
+        expandBufAdd4BE(pReq, match_list[i]->requestId);
+        expandBufAdd8BE(pReq, basket.threadId);
+        expandBufAddLocation(pReq, *pLoc);
+        expandBufAdd1(pReq, type_tag);
+        expandBufAddRefTypeId(pReq, typeId);
+        expandBufAddFieldId(pReq, fieldId);
+        expandBufAdd1(pReq, tag);
+        expandBufAddObjectId(pReq, thisPtr);
+        if (is_modification) {
+          Dbg::OutputFieldValue(fieldId, fieldValue, pReq);
+        }
+      }
+    }
+
+    CleanupMatchList(match_list, match_count);
+  }
+
+  Dbg::ManageDeoptimization();
+
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
+  return match_count != 0;
+}
+
 /*
  * A thread is starting or stopping.
  *
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index c2a2b54..5ffe753 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -372,7 +372,7 @@
   expandBufAdd1(reply, false);   // canAddMethod
   expandBufAdd1(reply, false);   // canUnrestrictedlyRedefineClasses
   expandBufAdd1(reply, false);   // canPopFrames
-  expandBufAdd1(reply, false);   // canUseInstanceFilters
+  expandBufAdd1(reply, true);    // canUseInstanceFilters
   expandBufAdd1(reply, false);   // canGetSourceDebugExtension
   expandBufAdd1(reply, false);   // canRequestVMDeathEvent
   expandBufAdd1(reply, false);   // canSetDefaultStratum
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 5fc0228..8e22c1d 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -237,55 +237,41 @@
   Locks::mutator_lock_->AssertNotHeld(self);
   UniquePtr<JdwpState> state(new JdwpState(options));
   switch (options->transport) {
-  case kJdwpTransportSocket:
-    InitSocketTransport(state.get(), options);
-    break;
+    case kJdwpTransportSocket:
+      InitSocketTransport(state.get(), options);
+      break;
 #ifdef HAVE_ANDROID_OS
-  case kJdwpTransportAndroidAdb:
-    InitAdbTransport(state.get(), options);
-    break;
+    case kJdwpTransportAndroidAdb:
+      InitAdbTransport(state.get(), options);
+      break;
 #endif
-  default:
-    LOG(FATAL) << "Unknown transport: " << options->transport;
+    default:
+      LOG(FATAL) << "Unknown transport: " << options->transport;
   }
 
-  if (!options->suspend) {
+  {
     /*
      * Grab a mutex before starting the thread.  This ensures they
      * won't signal the cond var before we're waiting.
      */
     MutexLock thread_start_locker(self, state->thread_start_lock_);
+
     /*
      * We have bound to a port, or are trying to connect outbound to a
      * debugger.  Create the JDWP thread and let it continue the mission.
      */
-    CHECK_PTHREAD_CALL(pthread_create, (&state->pthread_, NULL, StartJdwpThread, state.get()), "JDWP thread");
+    CHECK_PTHREAD_CALL(pthread_create, (&state->pthread_, nullptr, StartJdwpThread, state.get()),
+                       "JDWP thread");
 
     /*
      * Wait until the thread finishes basic initialization.
-     * TODO: cond vars should be waited upon in a loop
      */
-    state->thread_start_cond_.Wait(self);
-  } else {
-    {
-      /*
-       * Grab a mutex before starting the thread.  This ensures they
-       * won't signal the cond var before we're waiting.
-       */
-      MutexLock thread_start_locker(self, state->thread_start_lock_);
-      /*
-       * We have bound to a port, or are trying to connect outbound to a
-       * debugger.  Create the JDWP thread and let it continue the mission.
-       */
-      CHECK_PTHREAD_CALL(pthread_create, (&state->pthread_, NULL, StartJdwpThread, state.get()), "JDWP thread");
-
-      /*
-       * Wait until the thread finishes basic initialization.
-       * TODO: cond vars should be waited upon in a loop
-       */
+    while (!state->debug_thread_started_) {
       state->thread_start_cond_.Wait(self);
     }
+  }
 
+  if (options->suspend) {
     /*
      * For suspend=y, wait for the debugger to connect to us or for us to
      * connect to the debugger.
@@ -481,11 +467,8 @@
     /* process requests until the debugger drops */
     bool first = true;
     while (!Dbg::IsDisposed()) {
-      {
-        // sanity check -- shouldn't happen?
-        MutexLock mu(thread_, *Locks::thread_suspend_count_lock_);
-        CHECK_EQ(thread_->GetState(), kWaitingInMainDebuggerLoop);
-      }
+      // sanity check -- shouldn't happen?
+      CHECK_EQ(thread_->GetState(), kWaitingInMainDebuggerLoop);
 
       if (!netState->ProcessIncoming()) {
         /* blocking read */
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index bc8f51f..57d32bb 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -131,6 +131,7 @@
   heap_min_free_ = gc::Heap::kDefaultMinFree;
   heap_max_free_ = gc::Heap::kDefaultMaxFree;
   heap_target_utilization_ = gc::Heap::kDefaultTargetUtilization;
+  foreground_heap_growth_multiplier_ = gc::Heap::kDefaultHeapGrowthMultiplier;
   heap_growth_limit_ = 0;  // 0 means no growth limit .
   // Default to number of processors minus one since the main GC thread also does work.
   parallel_gc_threads_ = sysconf(_SC_NPROCESSORS_CONF) - 1;
@@ -314,6 +315,10 @@
       if (!ParseDouble(option, '=', 0.1, 0.9, &heap_target_utilization_)) {
         return false;
       }
+    } else if (StartsWith(option, "-XX:ForegroundHeapGrowthMultiplier=")) {
+      if (!ParseDouble(option, '=', 0.1, 0.9, &foreground_heap_growth_multiplier_)) {
+        return false;
+      }
     } else if (StartsWith(option, "-XX:ParallelGCThreads=")) {
       if (!ParseUnsignedInteger(option, '=', &parallel_gc_threads_)) {
         return false;
@@ -584,10 +589,10 @@
         Usage("Unknown -Xverify option %s", verify_mode.c_str());
         return false;
       }
-    } else if (StartsWith(option, "-ea:") ||
-               StartsWith(option, "-da:") ||
-               StartsWith(option, "-enableassertions:") ||
-               StartsWith(option, "-disableassertions:") ||
+    } else if (StartsWith(option, "-ea") ||
+               StartsWith(option, "-da") ||
+               StartsWith(option, "-enableassertions") ||
+               StartsWith(option, "-disableassertions") ||
                (option == "--runtime-arg") ||
                (option == "-esa") ||
                (option == "-dsa") ||
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 126096a..770e4ae 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -54,6 +54,7 @@
   size_t heap_min_free_;
   size_t heap_max_free_;
   double heap_target_utilization_;
+  double foreground_heap_growth_multiplier_;
   unsigned int parallel_gc_threads_;
   unsigned int conc_gc_threads_;
   gc::CollectorType collector_type_;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 5c31d35..eb0522a 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -564,6 +564,7 @@
                        options->heap_min_free_,
                        options->heap_max_free_,
                        options->heap_target_utilization_,
+                       options->foreground_heap_growth_multiplier_,
                        options->heap_maximum_size_,
                        options->image_,
                        options->collector_type_,
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 131e2b6..b3d14f0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1094,7 +1094,7 @@
     if (lock != nullptr) {
       SirtRef<mirror::Object> sirt_obj(self, lock);
       ObjectLock<mirror::Object> locker(self, &sirt_obj);
-      locker.Notify();
+      locker.NotifyAll();
     }
   }
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 8dad419..270deb0 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -617,7 +617,7 @@
   DCHECK(pReq != NULL);
   if (pReq->invoke_needed) {
     // Clear this before signaling.
-    pReq->invoke_needed = false;
+    pReq->Clear();
 
     VLOG(jdwp) << "invoke complete, signaling";
     MutexLock mu(self, pReq->lock);
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 1f24478..b85eb7e 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -549,6 +549,21 @@
   LOG(ERROR) << "Unexpected dex PC event in tracing " << PrettyMethod(method) << " " << new_dex_pc;
 };
 
+void Trace::FieldRead(Thread* /*thread*/, mirror::Object* this_object,
+                       mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // We're not recorded to listen to this kind of event, so complain.
+  LOG(ERROR) << "Unexpected field read event in tracing " << PrettyMethod(method) << " " << dex_pc;
+}
+
+void Trace::FieldWritten(Thread* /*thread*/, mirror::Object* this_object,
+                          mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field,
+                          const JValue& field_value)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // We're not recorded to listen to this kind of event, so complain.
+  LOG(ERROR) << "Unexpected field write event in tracing " << PrettyMethod(method) << " " << dex_pc;
+}
+
 void Trace::MethodEntered(Thread* thread, mirror::Object* this_object,
                           mirror::ArtMethod* method, uint32_t dex_pc) {
   uint32_t thread_clock_diff = 0;
diff --git a/runtime/trace.h b/runtime/trace.h
index 1af1283..bf4995a 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -32,6 +32,7 @@
 namespace art {
 
 namespace mirror {
+  class ArtField;
   class ArtMethod;
 }  // namespace mirror
 class Thread;
@@ -54,7 +55,7 @@
   kSampleProfilingActive,
 };
 
-class Trace : public instrumentation::InstrumentationListener {
+class Trace FINAL : public instrumentation::InstrumentationListener {
  public:
   enum TraceFlag {
     kTraceCountAllocs = 1,
@@ -78,23 +79,31 @@
   void CompareAndUpdateStackTrace(Thread* thread, std::vector<mirror::ArtMethod*>* stack_trace)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             mirror::ArtMethod* method, uint32_t dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            mirror::ArtMethod* method, uint32_t dex_pc,
-                            const JValue& return_value)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            mirror::ArtMethod* method, uint32_t dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          mirror::ArtMethod* method, uint32_t new_dex_pc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
-                               mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
-                               mirror::Throwable* exception_object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // InstrumentationListener implementation.
+  void MethodEntered(Thread* thread, mirror::Object* this_object,
+                     mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void MethodExited(Thread* thread, mirror::Object* this_object,
+                    mirror::ArtMethod* method, uint32_t dex_pc,
+                    const JValue& return_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                    mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void DexPcMoved(Thread* thread, mirror::Object* this_object,
+                  mirror::ArtMethod* method, uint32_t new_dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void FieldRead(Thread* thread, mirror::Object* this_object,
+                 mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void FieldWritten(Thread* thread, mirror::Object* this_object,
+                    mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field,
+                    const JValue& field_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+  void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
+                       mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
+                       mirror::Throwable* exception_object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
 
   // Reuse an old stack trace if it exists, otherwise allocate a new one.
   static std::vector<mirror::ArtMethod*>* AllocStackTrace();
diff --git a/runtime/utils.h b/runtime/utils.h
index dbc3ab7..5def66b 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -62,12 +62,20 @@
   return IsAligned<n>(reinterpret_cast<const uintptr_t>(x));
 }
 
+template<typename T>
+static inline bool IsAlignedParam(T x, int n) {
+  return (x & (n - 1)) == 0;
+}
+
 #define CHECK_ALIGNED(value, alignment) \
   CHECK(::art::IsAligned<alignment>(value)) << reinterpret_cast<const void*>(value)
 
 #define DCHECK_ALIGNED(value, alignment) \
   DCHECK(::art::IsAligned<alignment>(value)) << reinterpret_cast<const void*>(value)
 
+#define DCHECK_ALIGNED_PARAM(value, alignment) \
+  DCHECK(::art::IsAlignedParam(value, alignment)) << reinterpret_cast<const void*>(value)
+
 // Check whether an N-bit two's-complement representation can hold value.
 static inline bool IsInt(int N, word value) {
   CHECK_LT(0, N);
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 86a03ab..55ecf69 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -97,6 +97,7 @@
   }
 
   static int start;
+  private static int[] negIndex = { -100000 };
   public static void test_String_indexOf() {
     String str0 = "";
     String str1 = "/";
@@ -125,6 +126,7 @@
     Assert.assertEquals(str0.indexOf('a',0), -1);
     Assert.assertEquals(str0.indexOf('a',-1), -1);
     Assert.assertEquals(str1.indexOf('/',++start), -1);
+    Assert.assertEquals(str1.indexOf('a',negIndex[0]), -1);
     Assert.assertEquals(str3.indexOf('a',0), 0);
     Assert.assertEquals(str3.indexOf('a',1), -1);
     Assert.assertEquals(str3.indexOf('a',1234), -1);
diff --git a/test/403-optimizing-long/expected.txt b/test/403-optimizing-long/expected.txt
new file mode 100644
index 0000000..dff83cf
--- /dev/null
+++ b/test/403-optimizing-long/expected.txt
@@ -0,0 +1 @@
+Long: 42
diff --git a/test/403-optimizing-long/info.txt b/test/403-optimizing-long/info.txt
new file mode 100644
index 0000000..dc2d668
--- /dev/null
+++ b/test/403-optimizing-long/info.txt
@@ -0,0 +1 @@
+Tests long support on optimizing compiler.
diff --git a/test/403-optimizing-long/src/Main.java b/test/403-optimizing-long/src/Main.java
new file mode 100644
index 0000000..21af4e1
--- /dev/null
+++ b/test/403-optimizing-long/src/Main.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Note that $opt$ is a marker for the optimizing compiler to ensure
+// it does compile the method.
+
+public class Main {
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void main(String[] args) {
+    long l = $opt$ReturnLong();
+    expectEquals(42, l);
+    System.out.println("Long: " + l);
+
+    l = $opt$TakeOneLong1(42);
+    expectEquals(42, l);
+
+    l = $opt$TakeOneLong2(0, 42);
+    expectEquals(42, l);
+
+    l = $opt$TakeOneLong3(0, 1, 42);
+    expectEquals(42, l);
+
+    l = $opt$TakeOneLong4(0, 1, 2, 42);
+    expectEquals(42, l);
+
+    l = $opt$AddTwoLongs(42, 41);
+    expectEquals(83, l);
+
+    l = $opt$SubTwoLongs(42, 41);
+    expectEquals(1, l);
+
+    l = $opt$MakeCallsWithLongs1();
+    expectEquals(57, l);
+
+    l = $opt$MakeCallsWithLongs2();
+    expectEquals(900000000006L, l);
+
+    l = $opt$SubTwoLongs(-600000000006L, -200000000002L);
+    expectEquals(-400000000004L, l);
+
+    l = $opt$AddTwoLongs(-600000000006L, -200000000002L);
+    expectEquals(-800000000008L, l);
+  }
+
+  static long $opt$MakeCallsWithLongs1() {
+    long l = $opt$SubTwoLongs(-600000000006L, -200000000002L);
+    expectEquals(-400000000004L, l);
+
+    l = $opt$AddTwoLongs(-600000000006L, -200000000002L);
+    expectEquals(-800000000008L, l);
+
+    return $opt$ReturnLong() + $opt$TakeOneLong1(1) + $opt$TakeOneLong2(0, 2)
+        + $opt$TakeOneLong3(0, 0, 3) + $opt$TakeOneLong4(0, 0, 0, 4)
+        // Test invoke-range.
+        + $opt$TakeOneLong5(0, 0, 0, 0, 5);
+  }
+
+  static long $opt$MakeCallsWithLongs2() {
+    return $opt$AddThreeLongs(400000000003L, 200000000002L, 300000000001L);
+  }
+
+  static long $opt$ReturnLong() {
+    return 42;
+  }
+
+  static long $opt$TakeOneLong1(long l) {
+    return l;
+  }
+
+  static long $opt$TakeOneLong2(int a, long l) {
+    return l;
+  }
+
+  static long $opt$TakeOneLong3(int a, int b, long l) {
+    return l;
+  }
+
+  static long $opt$TakeOneLong4(int a, int b, int c, long l) {
+    return l;
+  }
+
+  static long $opt$TakeOneLong5(int a, int b, int c,int d,  long l) {
+    return l;
+  }
+
+  static long $opt$AddTwoLongs(long a, long b) {
+    return a + b;
+  }
+
+  static long $opt$AddThreeLongs(long a, long b, long c) {
+    return a + b + c;
+  }
+
+  static long $opt$SubTwoLongs(long a, long b) {
+    return a - b;
+  }
+}
diff --git a/test/Android.mk b/test/Android.mk
index 5879039..334df1f 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -188,7 +188,7 @@
 # Expand all tests.
 TEST_ART_RUN_TESTS := $(wildcard $(LOCAL_PATH)/[0-9]*)
 TEST_ART_RUN_TESTS := $(subst $(LOCAL_PATH)/,, $(TEST_ART_RUN_TESTS))
-TEST_ART_TIMING_SENSITIVE_RUN_TESTS := 055-enum-performance
+TEST_ART_TIMING_SENSITIVE_RUN_TESTS := 053-wait-some 055-enum-performance
 ifdef dist_goal # disable timing sensitive tests on "dist" builds.
   $(foreach test, $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), \
     $(info Skipping $(test)) \