am 26c6dce5: Merge branch \'dalvik-dev\' of persistent-https://googleplex-android.git.corp.google.com/platform/art into merge-goog-dalvik-dev-to-aosp-master

* commit '26c6dce526a90a7345600f5f82050a42740d12f8': (75 commits)
  Fix dumpsys meminfo <pid>.
  Minor field name cleanup in debugger.
  Faster Signature::operator==(const StringPiece& rhs).
  Add -xGc: MS, CMS, SS options to specify which GC to use.
  Fix a crash with -XX:DumpGCPerformanceOnShutdown.
  Add developer option for dumping GC cumulative timings on shutdown.
  Compact zygote.
  Fix a libartd.so boot crash when kMovingCollector is true.
  Inline RosAlloc::Alloc().
  Make verifier log hard failures by default.
  Refactor intrinsic CAS, prepare for 64-bit version.
  Search for miranda methods in virtual methods instead of interface.
  Fix concurrent GC to properly handle no zygote.
  Fix a libartd.so boot crash in Heap::AllocObjectWithAllocator()
  Add histogram for GC pause times.
  Remove sleep workaround in thread pool.
  Avoid some string allocations.
  Fix histogram test.
  Improve histogram and timing logger dumping.
  Fix memory leak caused by not adding lage objects to allocation stack.
  ...
diff --git a/Android.mk b/Android.mk
index 3112ab0..76fb411 100644
--- a/Android.mk
+++ b/Android.mk
@@ -18,6 +18,7 @@
 
 art_path := $(LOCAL_PATH)
 art_build_path := $(art_path)/build
+include $(art_build_path)/Android.common.mk
 
 ########################################################################
 # clean-oat targets
@@ -146,14 +147,9 @@
 test-art-host-gtest: $(ART_HOST_TEST_TARGETS)
 	@echo test-art-host-gtest PASSED
 
-define run-host-gtests-with
-  $(foreach file,$(sort $(ART_HOST_TEST_EXECUTABLES)),$(1) $(file) &&) true
-endef
-
 # "mm valgrind-test-art-host-gtest" to build and run the host gtests under valgrind.
 .PHONY: valgrind-test-art-host-gtest
-valgrind-test-art-host-gtest: test-art-host-dependencies
-	$(call run-host-gtests-with,valgrind --leak-check=full)
+valgrind-test-art-host-gtest: $(ART_HOST_VALGRIND_TEST_TARGETS)
 	@echo valgrind-test-art-host-gtest PASSED
 
 .PHONY: test-art-host-oat-default
@@ -305,6 +301,8 @@
 ########################################################################
 # oatdump targets
 
+ART_DUMP_OAT_PATH ?= $(OUT_DIR)
+
 .PHONY: dump-oat
 dump-oat: dump-oat-core dump-oat-boot
 
@@ -314,29 +312,29 @@
 .PHONY: dump-oat-core-host
 ifeq ($(ART_BUILD_HOST),true)
 dump-oat-core-host: $(HOST_CORE_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(HOST_CORE_IMG_OUT) --output=/tmp/core.host.oatdump.txt --host-prefix=""
-	@echo Output in /tmp/core.host.oatdump.txt
+	$(OATDUMP) --image=$(HOST_CORE_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/core.host.oatdump.txt --host-prefix=""
+	@echo Output in $(ART_DUMP_OAT_PATH)/core.host.oatdump.txt
 endif
 
 .PHONY: dump-oat-core-target
 ifeq ($(ART_BUILD_TARGET),true)
 dump-oat-core-target: $(TARGET_CORE_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(TARGET_CORE_IMG_OUT) --output=/tmp/core.target.oatdump.txt
-	@echo Output in /tmp/core.target.oatdump.txt
+	$(OATDUMP) --image=$(TARGET_CORE_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/core.target.oatdump.txt
+	@echo Output in $(ART_DUMP_OAT_PATH)/core.target.oatdump.txt
 endif
 
 .PHONY: dump-oat-boot
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
 dump-oat-boot: $(TARGET_BOOT_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(TARGET_BOOT_IMG_OUT) --output=/tmp/boot.oatdump.txt
-	@echo Output in /tmp/boot.oatdump.txt
+	$(OATDUMP) --image=$(TARGET_BOOT_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt
+	@echo Output in $(ART_DUMP_OAT_PATH)/boot.oatdump.txt
 endif
 
 .PHONY: dump-oat-Calculator
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
 dump-oat-Calculator: $(TARGET_OUT_APPS)/Calculator.odex $(TARGET_BOOT_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --oat-file=$< --output=/tmp/Calculator.oatdump.txt
-	@echo Output in /tmp/Calculator.oatdump.txt
+	$(OATDUMP) --oat-file=$< --output=$(ART_DUMP_OAT_PATH)/Calculator.oatdump.txt
+	@echo Output in $(ART_DUMP_OAT_PATH)/Calculator.oatdump.txt
 endif
 
 ########################################################################
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 655c7dd..bed48ba 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -22,6 +22,7 @@
 	compiler/elf_writer_test.cc \
 	compiler/image_test.cc \
 	compiler/jni/jni_compiler_test.cc \
+	compiler/leb128_encoder_test.cc \
 	compiler/oat_test.cc \
 	compiler/output_stream_test.cc \
 	compiler/utils/dedupe_set_test.cc \
@@ -80,6 +81,7 @@
 ART_HOST_TEST_EXECUTABLES :=
 ART_TARGET_TEST_EXECUTABLES :=
 ART_HOST_TEST_TARGETS :=
+ART_HOST_VALGRIND_TEST_TARGETS :=
 ART_TARGET_TEST_TARGETS :=
 
 ART_TEST_CFLAGS :=
@@ -170,6 +172,13 @@
 	@echo $$@ PASSED
 
 ART_HOST_TEST_TARGETS += $$(art_gtest_target)
+
+.PHONY: valgrind-$$(art_gtest_target)
+valgrind-$$(art_gtest_target): $$(art_gtest_exe) test-art-host-dependencies
+	valgrind --leak-check=full --error-exitcode=1 $$<
+	@echo $$@ PASSED
+
+ART_HOST_VALGRIND_TEST_TARGETS += valgrind-$$(art_gtest_target)
 endif
 endef
 
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index f964346..c04b38b 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -26,7 +26,7 @@
 
 # By default, do not run rerun dex2oat if the tool changes.
 # Comment out the | to force dex2oat to rerun on after all changes.
-DEX2OAT_DEPENDENCY := #|
+DEX2OAT_DEPENDENCY := |
 DEX2OAT_DEPENDENCY += $(DEX2OAT)
 DEX2OAT_DEPENDENCY += $(LIBART_COMPILER)
 
diff --git a/compiler/Android.mk b/compiler/Android.mk
index fc2f02b..b7dc9f6 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -23,6 +23,7 @@
 	dex/local_value_numbering.cc \
 	dex/arena_allocator.cc \
 	dex/arena_bit_vector.cc \
+	dex/quick/arm/arm_dex_file_method_inliner.cc \
 	dex/quick/arm/assemble_arm.cc \
 	dex/quick/arm/call_arm.cc \
 	dex/quick/arm/fp_arm.cc \
@@ -30,6 +31,8 @@
 	dex/quick/arm/target_arm.cc \
 	dex/quick/arm/utility_arm.cc \
 	dex/quick/codegen_util.cc \
+	dex/quick/dex_file_method_inliner.cc \
+	dex/quick/dex_file_to_method_inliner_map.cc \
 	dex/quick/gen_common.cc \
 	dex/quick/gen_invoke.cc \
 	dex/quick/gen_loadstore.cc \
@@ -38,6 +41,7 @@
 	dex/quick/mips/call_mips.cc \
 	dex/quick/mips/fp_mips.cc \
 	dex/quick/mips/int_mips.cc \
+	dex/quick/mips/mips_dex_file_method_inliner.cc \
 	dex/quick/mips/target_mips.cc \
 	dex/quick/mips/utility_mips.cc \
 	dex/quick/mir_to_lir.cc \
@@ -48,6 +52,7 @@
 	dex/quick/x86/int_x86.cc \
 	dex/quick/x86/target_x86.cc \
 	dex/quick/x86/utility_x86.cc \
+	dex/quick/x86/x86_dex_file_method_inliner.cc \
 	dex/portable/mir_to_gbc.cc \
 	dex/dex_to_dex_compiler.cc \
 	dex/mir_dataflow.cc \
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index fd46975..3798b45 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -20,7 +20,6 @@
 #include <vector>
 #include <llvm/IR/Module.h>
 #include "arena_allocator.h"
-#include "backend.h"
 #include "compiler_enums.h"
 #include "dex/quick/mir_to_lir.h"
 #include "dex_instruction.h"
@@ -39,39 +38,14 @@
 }  // namespace llvm
 
 struct ArenaMemBlock;
+class Backend;
 struct Memstats;
 class MIRGraph;
 class Mir2Lir;
 
 struct CompilationUnit {
-  explicit CompilationUnit(ArenaPool* pool)
-    : compiler_driver(NULL),
-      class_linker(NULL),
-      dex_file(NULL),
-      class_loader(NULL),
-      class_def_idx(0),
-      method_idx(0),
-      code_item(NULL),
-      access_flags(0),
-      invoke_type(kDirect),
-      shorty(NULL),
-      disable_opt(0),
-      enable_debug(0),
-      verbose(false),
-      compiler_backend(kNoBackend),
-      instruction_set(kNone),
-      num_dalvik_registers(0),
-      insns(NULL),
-      num_ins(0),
-      num_outs(0),
-      num_regs(0),
-      num_compiler_temps(0),
-      compiler_flip_match(false),
-      arena(pool),
-      mir_graph(NULL),
-      cg(NULL),
-      timings("QuickCompiler", true, false) {
-      }
+  explicit CompilationUnit(ArenaPool* pool);
+  ~CompilationUnit();
 
   void StartTimingSplit(const char* label);
   void NewTimingSplit(const char* label);
@@ -120,7 +94,7 @@
 
   UniquePtr<MIRGraph> mir_graph;   // MIR container.
   UniquePtr<Backend> cg;           // Target-specific codegen.
-  base::TimingLogger timings;
+  TimingLogger timings;
 };
 
 }  // namespace art
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 2f8521f..e53d636 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -31,6 +31,8 @@
 #include "llvm/llvm_compilation_unit.h"
 #endif
 
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+
 namespace {
 #if !defined(ART_USE_PORTABLE_COMPILER)
   pthread_once_t llvm_multi_init = PTHREAD_ONCE_INIT;
@@ -61,14 +63,20 @@
 LLVMInfo::~LLVMInfo() {
 }
 
+QuickCompilerContext::QuickCompilerContext(CompilerDriver& compiler)
+  : inliner_map_(new DexFileToMethodInlinerMap(&compiler)) {
+}
+
+QuickCompilerContext::~QuickCompilerContext() {
+}
+
 extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver& compiler) {
   CHECK(compiler.GetCompilerContext() == NULL);
-  LLVMInfo* llvm_info = new LLVMInfo();
-  compiler.SetCompilerContext(llvm_info);
+  compiler.SetCompilerContext(new QuickCompilerContext(compiler));
 }
 
 extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver& compiler) {
-  delete reinterpret_cast<LLVMInfo*>(compiler.GetCompilerContext());
+  delete reinterpret_cast<QuickCompilerContext*>(compiler.GetCompilerContext());
   compiler.SetCompilerContext(NULL);
 }
 
@@ -84,6 +92,7 @@
   // (1 << kBBOpt) |
   // (1 << kMatch) |
   // (1 << kPromoteCompilerTemps) |
+  // (1 << kSuppressExceptionEdges) |
   0;
 
 static uint32_t kCompilerDebugFlags = 0 |     // Enable debug/testing modes
@@ -108,6 +117,38 @@
   // (1 << kDebugTimings) |
   0;
 
+CompilationUnit::CompilationUnit(ArenaPool* pool)
+  : compiler_driver(NULL),
+    class_linker(NULL),
+    dex_file(NULL),
+    class_loader(NULL),
+    class_def_idx(0),
+    method_idx(0),
+    code_item(NULL),
+    access_flags(0),
+    invoke_type(kDirect),
+    shorty(NULL),
+    disable_opt(0),
+    enable_debug(0),
+    verbose(false),
+    compiler_backend(kNoBackend),
+    instruction_set(kNone),
+    num_dalvik_registers(0),
+    insns(NULL),
+    num_ins(0),
+    num_outs(0),
+    num_regs(0),
+    num_compiler_temps(0),
+    compiler_flip_match(false),
+    arena(pool),
+    mir_graph(NULL),
+    cg(NULL),
+    timings("QuickCompiler", true, false) {
+}
+
+CompilationUnit::~CompilationUnit() {
+}
+
 // TODO: Add a cumulative version of logging, and combine with dex2oat --dump-timing
 void CompilationUnit::StartTimingSplit(const char* label) {
   if (enable_debug & (1 << kDebugTimings)) {
@@ -125,7 +166,7 @@
   if (enable_debug & (1 << kDebugTimings)) {
     timings.EndSplit();
     LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
-    LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+    LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
 }
 
@@ -180,7 +221,9 @@
 
   if (compiler_backend == kPortable) {
     // Fused long branches not currently useful in bitcode.
-    cu.disable_opt |= (1 << kBranchFusing);
+    cu.disable_opt |=
+        (1 << kBranchFusing) |
+        (1 << kSuppressExceptionEdges);
   }
 
   if (cu.instruction_set == kMips) {
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
index 43f6855..4a863f5 100644
--- a/compiler/dex/frontend.h
+++ b/compiler/dex/frontend.h
@@ -56,6 +56,7 @@
   kMatch,
   kPromoteCompilerTemps,
   kBranchFusing,
+  kSuppressExceptionEdges,
 };
 
 // Force code generation paths for testing.
@@ -81,6 +82,9 @@
   kDebugTimings
 };
 
+class DexFileToMethodInlinerMap;
+class CompilerDriver;
+
 class LLVMInfo {
   public:
     LLVMInfo();
@@ -109,6 +113,19 @@
     UniquePtr<art::llvm::IRBuilder> ir_builder_;
 };
 
+class QuickCompilerContext {
+  public:
+    explicit QuickCompilerContext(CompilerDriver& compiler);
+    ~QuickCompilerContext();
+
+    DexFileToMethodInlinerMap* GetInlinerMap() {
+      return inliner_map_.get();
+    }
+
+  private:
+    UniquePtr<DexFileToMethodInlinerMap> inliner_map_;
+};
+
 struct CompilationUnit;
 struct BasicBlock;
 
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 35d2923..75883b7 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -380,7 +380,9 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Establish value number for loaded register. Note use of memory version.
@@ -419,7 +421,9 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Rev the memory version
@@ -443,7 +447,9 @@
         } else {
           null_checked_.insert(base);
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         uint16_t memory_version = GetMemoryVersion(base, field_ref);
         if (opcode == Instruction::IGET_WIDE) {
@@ -473,7 +479,9 @@
         } else {
           null_checked_.insert(base);
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         AdvanceMemoryVersion(base, field_ref);
       }
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 11e19dc..d359ee2 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1243,12 +1243,13 @@
     if (mir->ssa_rep == NULL) {
       continue;
     }
-    // Each level of nesting adds *16 to count, up to 3 levels deep.
-    uint32_t weight = std::min(3U, static_cast<uint32_t>(bb->nesting_depth) * 4);
+    // Each level of nesting adds *100 to count, up to 3 levels deep.
+    uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
+    uint32_t weight = std::max(1U, depth * 100);
     for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
       int s_reg = mir->ssa_rep->uses[i];
       raw_use_counts_.Increment(s_reg);
-      use_counts_.Put(s_reg, use_counts_.Get(s_reg) + (1 << weight));
+      use_counts_.Put(s_reg, use_counts_.Get(s_reg) + weight);
     }
     if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
       int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
@@ -1267,7 +1268,7 @@
         }
         if (uses_method_star) {
           raw_use_counts_.Increment(method_sreg_);
-          use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + (1 << weight));
+          use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + weight);
         }
       }
     }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index cf758fc..deaf2ff 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -365,8 +365,8 @@
 }
 
 /* Process instructions with the kSwitch flag */
-void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                                int flags) {
+BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
+                                       int width, int flags) {
   const uint16_t* switch_data =
       reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB);
   int size;
@@ -437,6 +437,7 @@
                                             /* create */ true, /* immed_pred_block_p */ NULL);
   cur_block->fall_through = fallthrough_block->id;
   fallthrough_block->predecessors->Insert(cur_block->id);
+  return cur_block;
 }
 
 /* Process instructions with the kThrow flag */
@@ -444,6 +445,9 @@
                                       int width, int flags, ArenaBitVector* try_block_addr,
                                       const uint16_t* code_ptr, const uint16_t* code_end) {
   bool in_try_block = try_block_addr->IsBitSet(cur_offset);
+  bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW);
+  bool build_all_edges =
+      (cu_->disable_opt & (1 << kSuppressExceptionEdges)) || is_throw || in_try_block;
 
   /* In try block */
   if (in_try_block) {
@@ -473,7 +477,7 @@
       cur_block->successor_blocks->Insert(successor_block_info);
       catch_block->predecessors->Insert(cur_block->id);
     }
-  } else {
+  } else if (build_all_edges) {
     BasicBlock *eh_block = NewMemBB(kExceptionHandling, num_blocks_++);
     cur_block->taken = eh_block->id;
     block_list_.Insert(eh_block);
@@ -481,7 +485,7 @@
     eh_block->predecessors->Insert(cur_block->id);
   }
 
-  if (insn->dalvikInsn.opcode == Instruction::THROW) {
+  if (is_throw) {
     cur_block->explicit_throw = true;
     if (code_ptr < code_end) {
       // Force creation of new block following THROW via side-effect
@@ -494,6 +498,16 @@
     }
   }
 
+  if (!build_all_edges) {
+    /*
+     * Even though there is an exception edge here, control cannot return to this
+     * method.  Thus, for the purposes of dataflow analysis and optimization, we can
+     * ignore the edge.  Doing this reduces compile time, and increases the scope
+     * of the basic-block level optimization pass.
+     */
+    return cur_block;
+  }
+
   /*
    * Split the potentially-throwing instruction into two parts.
    * The first half will be a pseudo-op that captures the exception
@@ -695,7 +709,7 @@
       cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_,
                                   code_ptr, code_end);
     } else if (flags & Instruction::kSwitch) {
-      ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
+      cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
     }
     current_offset_ += width;
     BasicBlock *next_block = FindBlock(current_offset_, /* split */ false, /* create */
@@ -1100,6 +1114,7 @@
 void MIRGraph::DumpMIRGraph() {
   BasicBlock* bb;
   const char* block_type_names[] = {
+    "Null Block",
     "Entry Block",
     "Code Block",
     "Exit Block",
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index a69dde0..8c20728 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -698,8 +698,8 @@
   void ProcessTryCatchBlocks();
   BasicBlock* ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
                                int flags, const uint16_t* code_ptr, const uint16_t* code_end);
-  void ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                        int flags);
+  BasicBlock* ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
+                               int flags);
   BasicBlock* ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
                               int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr,
                               const uint16_t* code_end);
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 07bd2aa..963cbeb 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -1970,7 +1970,7 @@
 
     ::llvm::OwningPtr< ::llvm::tool_output_file> out_file(
         new ::llvm::tool_output_file(fname.c_str(), errmsg,
-                                   ::llvm::sys::fs::F_Binary));
+                                   ::llvm::raw_fd_ostream::F_Binary));
 
     if (!errmsg.empty()) {
       LOG(ERROR) << "Failed to create bitcode output file: " << errmsg;
diff --git a/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc
new file mode 100644
index 0000000..257b2c4
--- /dev/null
+++ b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "dex/compiler_enums.h"
+
+#include "arm_dex_file_method_inliner.h"
+
+namespace art {
+
+const DexFileMethodInliner::IntrinsicDef ArmDexFileMethodInliner::kIntrinsicMethods[] = {
+#define INTRINSIC(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, d } }
+
+    INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
+    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+
+    INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
+    INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+
+    INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
+    INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
+
+    INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
+    INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
+    INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
+    INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
+
+    INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
+
+    INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
+    INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
+    INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
+    INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
+
+    INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas,
+              kIntrinsicFlagNone),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsLong),
+    INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas,
+              kIntrinsicFlagIsObject),
+
+#define UNSAFE_GET_PUT(type, code, type_flags) \
+    INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              type_flags & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              (type_flags | kIntrinsicFlagIsVolatile) & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags), \
+    INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsVolatile), \
+    INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsOrdered)
+
+    UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
+    UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
+    UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
+#undef UNSAFE_GET_PUT
+
+#undef INTRINSIC
+};
+
+ArmDexFileMethodInliner::ArmDexFileMethodInliner() {
+}
+
+ArmDexFileMethodInliner::~ArmDexFileMethodInliner() {
+}
+
+void ArmDexFileMethodInliner::FindIntrinsics(const DexFile* dex_file) {
+  IndexCache cache;
+  DoFindIntrinsics(dex_file, &cache, kIntrinsicMethods, arraysize(kIntrinsicMethods));
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm/arm_dex_file_method_inliner.h b/compiler/dex/quick/arm/arm_dex_file_method_inliner.h
new file mode 100644
index 0000000..3428391
--- /dev/null
+++ b/compiler/dex/quick/arm/arm_dex_file_method_inliner.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_ARM_ARM_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_ARM_ARM_DEX_FILE_METHOD_INLINER_H_
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class ArmDexFileMethodInliner : public DexFileMethodInliner {
+  public:
+    ArmDexFileMethodInliner();
+    ~ArmDexFileMethodInliner();
+
+    void FindIntrinsics(const DexFile* dex_file);
+
+  private:
+    static const IntrinsicDef kIntrinsicMethods[];
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_ARM_ARM_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index ffaaf84..8cd7c94 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -334,7 +334,7 @@
   kThumb2VcvtDF,     // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
   kThumb2Vsqrts,     // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
   kThumb2Vsqrtd,     // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
-  kThumb2MovImmShift,  // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
+  kThumb2MovI8M,     // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
   kThumb2MovImm16,   // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
   kThumb2StrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
   kThumb2LdrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
@@ -346,14 +346,14 @@
   kThumb2MovRR,      // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0].
   kThumb2Vmovs,      // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0].
   kThumb2Vmovd,      // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0].
-  kThumb2Ldmia,      // ldmia  [111010001001[ rn[19..16] mask[15..0].
-  kThumb2Stmia,      // stmia  [111010001000[ rn[19..16] mask[15..0].
+  kThumb2Ldmia,      // ldmia  [111010001001] rn[19..16] mask[15..0].
+  kThumb2Stmia,      // stmia  [111010001000] rn[19..16] mask[15..0].
   kThumb2AddRRR,     // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2SubRRR,     // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2SbcRRR,     // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2CmpRR,      // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0].
-  kThumb2SubRRI12,   // sub rd, rn, #imm12 [11110] i [01010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2MvnImm12,   // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
+  kThumb2SubRRI12,   // sub rd, rn, #imm12 [11110] i [101010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2MvnI8M,     // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
   kThumb2Sel,        // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0].
   kThumb2Ubfx,       // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
   kThumb2Sbfx,       // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
@@ -373,7 +373,8 @@
   kThumb2StrbRRI12,  // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0].
   kThumb2Pop,        // pop   [1110100010111101] list[15-0]*/
   kThumb2Push,       // push  [1110100100101101] list[15-0]*/
-  kThumb2CmpRI12,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
+  kThumb2CmpRI8M,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
+  kThumb2CmnRI8M,    // cmn rn, #<const> [11110] i [010001] rn[19-16] [0] imm3 [1111] imm8[7..0].
   kThumb2AdcRRR,     // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2AndRRR,     // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2BicRRR,     // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
@@ -383,7 +384,7 @@
   kThumb2SdivRRR,    // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
   kThumb2UdivRRR,    // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
   kThumb2MnvRR,      // mvn [11101010011011110] rd[11-8] [0000] rm[3..0].
-  kThumb2RsubRRI8,   // rsub [111100011100] rn[19..16] [0000] rd[11..8] imm8[7..0].
+  kThumb2RsubRRI8M,  // rsb rd, rn, #<const> [11110] i [011101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2NegRR,      // actually rsub rd, rn, #0.
   kThumb2OrrRRR,     // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2TstRR,      // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0].
@@ -395,14 +396,14 @@
   kThumb2LsrRRI5,    // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0].
   kThumb2AsrRRI5,    // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0].
   kThumb2RorRRI5,    // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0].
-  kThumb2BicRRI8,    // bic [111100000010] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2AndRRI8,    // bic [111100000000] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2OrrRRI8,    // orr [111100000100] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2EorRRI8,    // eor [111100001000] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2AddRRI8,    // add [111100001000] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2AdcRRI8,    // adc [111100010101] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2SubRRI8,    // sub [111100011011] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2SbcRRI8,    // sbc [111100010111] rn[19..16] [0] imm3 rd[11..8] imm8.
+  kThumb2BicRRI8M,   // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AndRRI8M,   // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2OrrRRI8M,   // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2EorRRI8M,   // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AddRRI8M,   // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AdcRRI8M,   // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2SubRRI8M,   // sub rd, rn, #<const> [11110] i [011011] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2SbcRRI8M,   // sub rd, rn, #<const> [11110] i [010111] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2RevRR,      // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0]
   kThumb2RevshRR,    // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0]
   kThumb2It,         // it [10111111] firstcond[7-4] mask[3-0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 3d0f263..1c81a5a 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -489,7 +489,7 @@
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MovImmShift, 0xf04f0000, /* no setflags encoding */
+    ENCODING_MAP(kThumb2MovI8M, 0xf04f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
                  "mov", "!0C, #!1m", 4, kFixupNone),
@@ -573,8 +573,8 @@
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
                  "sub", "!0C,!1C,#!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MvnImm12,  0xf06f0000, /* no setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
+    ENCODING_MAP(kThumb2MvnI8M,  0xf06f0000, /* no setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
                  "mvn", "!0C, #!1n", 4, kFixupNone),
     ENCODING_MAP(kThumb2Sel,       0xfaa0f080,
@@ -656,11 +656,16 @@
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
                  | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
-    ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00,
+    ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00,
                  kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_USE0 | SETS_CCODES,
                  "cmp", "!0C, #!1m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2CmnRI8M, 0xf1100f00,
+                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmn", "!0C, #!1m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
@@ -699,11 +704,11 @@
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "mvn", "!0C, !1C, shift !2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RsubRRI8,       0xf1d00000,
+    ENCODING_MAP(kThumb2RsubRRI8M,       0xf1d00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "rsb", "!0C,!1C,#!2m", 4, kFixupNone),
+                 "rsbs", "!0C,!1C,#!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
@@ -750,38 +755,38 @@
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "ror", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2BicRRI8,  0xf0200000,
+    ENCODING_MAP(kThumb2BicRRI8M,  0xf0200000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "bic", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AndRRI8,  0xf0000000,
+    ENCODING_MAP(kThumb2AndRRI8M,  0xf0000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "and", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrrRRI8,  0xf0400000,
+    ENCODING_MAP(kThumb2OrrRRI8M,  0xf0400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "orr", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2EorRRI8,  0xf0800000,
+    ENCODING_MAP(kThumb2EorRRI8M,  0xf0800000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "eor", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AddRRI8,  0xf1100000,
+    ENCODING_MAP(kThumb2AddRRI8M,  0xf1100000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
                  "adds", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AdcRRI8,  0xf1500000,
+    ENCODING_MAP(kThumb2AdcRRI8M,  0xf1500000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
                  "adcs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SubRRI8,  0xf1b00000,
+    ENCODING_MAP(kThumb2SubRRI8M,  0xf1b00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
                  "subs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SbcRRI8,  0xf1700000,
+    ENCODING_MAP(kThumb2SbcRRI8M,  0xf1700000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 15355be..de3223a 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -104,7 +104,7 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                   RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-    bool GenInlinedCas32(CallInfo* info, bool need_write_barrier);
+    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
     bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 480e021..1575ece 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -274,7 +274,7 @@
   NewLIR0(kThumb2Fmstat);
 
   OpIT((default_result == -1) ? kCondGt : kCondMi, "");
-  NewLIR2(kThumb2MovImmShift, rl_result.low_reg,
+  NewLIR2(kThumb2MovI8M, rl_result.low_reg,
           ModifiedImmediate(-default_result));  // Must not alter ccodes
   GenBarrier();
 
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 42bf3d4..9727179 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -97,7 +97,7 @@
   LIR* branch3 = OpCondBranch(kCondEq, NULL);
 
   OpIT(kCondHi, "E");
-  NewLIR2(kThumb2MovImmShift, t_reg, ModifiedImmediate(-1));
+  NewLIR2(kThumb2MovI8M, t_reg, ModifiedImmediate(-1));
   LoadConstant(t_reg, 1);
   GenBarrier();
 
@@ -299,7 +299,6 @@
 LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, int check_value,
                                 LIR* target) {
   LIR* branch;
-  int mod_imm;
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
   /*
    * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
@@ -317,16 +316,7 @@
     branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
                      reg, 0);
   } else {
-    mod_imm = ModifiedImmediate(check_value);
-    if (ARM_LOWREG(reg) && ((check_value & 0xff) == check_value)) {
-      NewLIR2(kThumbCmpRI8, reg, check_value);
-    } else if (mod_imm >= 0) {
-      NewLIR2(kThumb2CmpRI12, reg, mod_imm);
-    } else {
-      int t_reg = AllocTemp();
-      LoadConstant(t_reg, check_value);
-      OpRegReg(kOpCmp, reg, t_reg);
-    }
+    OpRegImm(kOpCmp, reg, check_value);
     branch = NewLIR2(kThumbBCond, 0, arm_cond);
   }
   branch->target = target;
@@ -570,14 +560,15 @@
   LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
 }
 
-bool ArmMir2Lir::GenInlinedCas32(CallInfo* info, bool need_write_barrier) {
+bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
+  DCHECK(!is_long);  // not supported yet
   DCHECK_EQ(cu_->instruction_set, kThumb2);
   // Unused - RegLocation rl_src_unsafe = info->args[0];
-  RegLocation rl_src_obj= info->args[1];  // Object - known non-null
-  RegLocation rl_src_offset= info->args[2];  // long low
+  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
+  RegLocation rl_src_offset = info->args[2];  // long low
   rl_src_offset.wide = 0;  // ignore high half in info->args[3]
-  RegLocation rl_src_expected= info->args[4];  // int or Object
-  RegLocation rl_src_new_value= info->args[5];  // int or Object
+  RegLocation rl_src_expected = info->args[4];  // int, long or Object
+  RegLocation rl_src_new_value = info->args[5];  // int, long or Object
   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
 
 
@@ -587,7 +578,7 @@
   RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
   RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
 
-  if (need_write_barrier && !mir_graph_->IsConstantNullRef(rl_new_value)) {
+  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
     // Mark card for object assuming new value is stored.
     MarkGCCard(rl_new_value.low_reg, rl_object.low_reg);
   }
@@ -1124,8 +1115,8 @@
   switch (opcode) {
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
-      NewLIR3(kThumb2AddRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
-      NewLIR3(kThumb2AdcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+      NewLIR3(kThumb2AddRRI8M, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+      NewLIR3(kThumb2AdcRRI8M, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
       break;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
@@ -1152,8 +1143,8 @@
       break;
     case Instruction::SUB_LONG_2ADDR:
     case Instruction::SUB_LONG:
-      NewLIR3(kThumb2SubRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
-      NewLIR3(kThumb2SbcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+      NewLIR3(kThumb2SubRRI8M, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+      NewLIR3(kThumb2SbcRRI8M, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
       break;
     default:
       LOG(FATAL) << "Unexpected opcode " << opcode;
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index d631cf7..8a8b168 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -184,12 +184,12 @@
   /* Check Modified immediate special cases */
   mod_imm = ModifiedImmediate(value);
   if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MovImmShift, r_dest, mod_imm);
+    res = NewLIR2(kThumb2MovI8M, r_dest, mod_imm);
     return res;
   }
   mod_imm = ModifiedImmediate(~value);
   if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MvnImm12, r_dest, mod_imm);
+    res = NewLIR2(kThumb2MvnI8M, r_dest, mod_imm);
     return res;
   }
   /* 16-bit immediate? */
@@ -446,7 +446,6 @@
   ArmOpcode alt_opcode = kThumbBkpt;
   bool all_low_regs = (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src1));
   int32_t mod_imm = ModifiedImmediate(value);
-  int32_t mod_imm_neg = ModifiedImmediate(-value);
 
   switch (op) {
     case kOpLsl:
@@ -482,47 +481,55 @@
         else
           opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3;
         return NewLIR3(opcode, r_dest, r_src1, abs_value);
-      } else if ((abs_value & 0xff) == abs_value) {
+      } else if ((abs_value & 0x3ff) == abs_value) {
         if (op == kOpAdd)
           opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12;
         else
           opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12;
         return NewLIR3(opcode, r_dest, r_src1, abs_value);
       }
-      if (mod_imm_neg >= 0) {
-        op = (op == kOpAdd) ? kOpSub : kOpAdd;
-        mod_imm = mod_imm_neg;
+      if (mod_imm < 0) {
+        mod_imm = ModifiedImmediate(-value);
+        if (mod_imm >= 0) {
+          op = (op == kOpAdd) ? kOpSub : kOpAdd;
+        }
       }
       if (op == kOpSub) {
-        opcode = kThumb2SubRRI8;
+        opcode = kThumb2SubRRI8M;
         alt_opcode = kThumb2SubRRR;
       } else {
-        opcode = kThumb2AddRRI8;
+        opcode = kThumb2AddRRI8M;
         alt_opcode = kThumb2AddRRR;
       }
       break;
     case kOpRsub:
-      opcode = kThumb2RsubRRI8;
+      opcode = kThumb2RsubRRI8M;
       alt_opcode = kThumb2RsubRRR;
       break;
     case kOpAdc:
-      opcode = kThumb2AdcRRI8;
+      opcode = kThumb2AdcRRI8M;
       alt_opcode = kThumb2AdcRRR;
       break;
     case kOpSbc:
-      opcode = kThumb2SbcRRI8;
+      opcode = kThumb2SbcRRI8M;
       alt_opcode = kThumb2SbcRRR;
       break;
     case kOpOr:
-      opcode = kThumb2OrrRRI8;
+      opcode = kThumb2OrrRRI8M;
       alt_opcode = kThumb2OrrRRR;
       break;
     case kOpAnd:
-      opcode = kThumb2AndRRI8;
+      if (mod_imm < 0) {
+        mod_imm = ModifiedImmediate(~value);
+        if (mod_imm >= 0) {
+          return NewLIR3(kThumb2BicRRI8M, r_dest, r_src1, mod_imm);
+        }
+      }
+      opcode = kThumb2AndRRI8M;
       alt_opcode = kThumb2AndRRR;
       break;
     case kOpXor:
-      opcode = kThumb2EorRRI8;
+      opcode = kThumb2EorRRI8M;
       alt_opcode = kThumb2EorRRR;
       break;
     case kOpMul:
@@ -531,15 +538,19 @@
       alt_opcode = kThumb2MulRRR;
       break;
     case kOpCmp: {
-      int mod_imm = ModifiedImmediate(value);
       LIR* res;
       if (mod_imm >= 0) {
-        res = NewLIR2(kThumb2CmpRI12, r_src1, mod_imm);
+        res = NewLIR2(kThumb2CmpRI8M, r_src1, mod_imm);
       } else {
-        int r_tmp = AllocTemp();
-        res = LoadConstant(r_tmp, value);
-        OpRegReg(kOpCmp, r_src1, r_tmp);
-        FreeTemp(r_tmp);
+        mod_imm = ModifiedImmediate(-value);
+        if (mod_imm >= 0) {
+          res = NewLIR2(kThumb2CmnRI8M, r_src1, mod_imm);
+        } else {
+          int r_tmp = AllocTemp();
+          res = LoadConstant(r_tmp, value);
+          OpRegReg(kOpCmp, r_src1, r_tmp);
+          FreeTemp(r_tmp);
+        }
       }
       return res;
     }
@@ -585,13 +596,10 @@
       }
       break;
     case kOpCmp:
-      if (ARM_LOWREG(r_dest_src1) && short_form) {
-        opcode = (short_form) ?  kThumbCmpRI8 : kThumbCmpRR;
-      } else if (ARM_LOWREG(r_dest_src1)) {
-        opcode = kThumbCmpRR;
+      if (!neg && short_form) {
+        opcode = kThumbCmpRI8;
       } else {
         short_form = false;
-        opcode = kThumbCmpHL;
       }
       break;
     default:
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index dfbc887..4bc0b35 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -920,7 +920,8 @@
       core_spill_mask_(0),
       fp_spill_mask_(0),
       first_lir_insn_(NULL),
-      last_lir_insn_(NULL) {
+      last_lir_insn_(NULL),
+      inliner_(nullptr) {
   promotion_map_ = static_cast<PromotionMap*>
       (arena_->Alloc((cu_->num_dalvik_registers  + cu_->num_compiler_temps + 1) *
                       sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc));
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
new file mode 100644
index 0000000..6c0328e
--- /dev/null
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include "base/macros.h"
+#include "dex/mir_graph.h"
+
+#include "dex_file_method_inliner.h"
+
+namespace art {
+
+const char* DexFileMethodInliner::kClassCacheNames[] = {
+    "Z",                       // kClassCacheBoolean
+    "B",                       // kClassCacheByte
+    "C",                       // kClassCacheChar
+    "S",                       // kClassCacheShort
+    "I",                       // kClassCacheInt
+    "J",                       // kClassCacheLong
+    "F",                       // kClassCacheFloat
+    "D",                       // kClassCacheDouble
+    "V",                       // kClassCacheVoid
+    "Ljava/lang/Object;",      // kClassCacheJavaLangObject
+    "Ljava/lang/String;",      // kClassCacheJavaLangString
+    "Ljava/lang/Double;",      // kClassCacheJavaLangDouble
+    "Ljava/lang/Float;",       // kClassCacheJavaLangFloat
+    "Ljava/lang/Integer;",     // kClassCacheJavaLangInteger
+    "Ljava/lang/Long;",        // kClassCacheJavaLangLong
+    "Ljava/lang/Short;",       // kClassCacheJavaLangShort
+    "Ljava/lang/Math;",        // kClassCacheJavaLangMath
+    "Ljava/lang/StrictMath;",  // kClassCacheJavaLangStrictMath
+    "Ljava/lang/Thread;",      // kClassCacheJavaLangThread
+    "Llibcore/io/Memory;",     // kClassCacheLibcoreIoMemory
+    "Lsun/misc/Unsafe;",       // kClassCacheSunMiscUnsafe
+};
+
+const char* DexFileMethodInliner::kNameCacheNames[] = {
+    "reverseBytes",          // kNameCacheReverseBytes
+    "doubleToRawLongBits",   // kNameCacheDoubleToRawLongBits
+    "longBitsToDouble",      // kNameCacheLongBitsToDouble
+    "floatToRawIntBits",     // kNameCacheFloatToRawIntBits
+    "intBitsToFloat",        // kNameCacheIntBitsToFloat
+    "abs",                   // kNameCacheAbs
+    "max",                   // kNameCacheMax
+    "min",                   // kNameCacheMin
+    "sqrt",                  // kNameCacheSqrt
+    "charAt",                // kNameCacheCharAt
+    "compareTo",             // kNameCacheCompareTo
+    "isEmpty",               // kNameCacheIsEmpty
+    "indexOf",               // kNameCacheIndexOf
+    "length",                // kNameCacheLength
+    "currentThread",         // kNameCacheCurrentThread
+    "peekByte",              // kNameCachePeekByte
+    "peekIntNative",         // kNameCachePeekIntNative
+    "peekLongNative",        // kNameCachePeekLongNative
+    "peekShortNative",       // kNameCachePeekShortNative
+    "pokeByte",              // kNameCachePokeByte
+    "pokeIntNative",         // kNameCachePokeIntNative
+    "pokeLongNative",        // kNameCachePokeLongNative
+    "pokeShortNative",       // kNameCachePokeShortNative
+    "compareAndSwapInt",     // kNameCacheCompareAndSwapInt
+    "compareAndSwapLong",    // kNameCacheCompareAndSwapLong
+    "compareAndSwapObject",  // kNameCacheCompareAndSwapObject
+    "getInt",                // kNameCacheGetInt
+    "getIntVolatile",        // kNameCacheGetIntVolatile
+    "putInt",                // kNameCachePutInt
+    "putIntVolatile",        // kNameCachePutIntVolatile
+    "putOrderedInt",         // kNameCachePutOrderedInt
+    "getLong",               // kNameCacheGetLong
+    "getLongVolatile",       // kNameCacheGetLongVolatile
+    "putLong",               // kNameCachePutLong
+    "putLongVolatile",       // kNameCachePutLongVolatile
+    "putOrderedLong",        // kNameCachePutOrderedLong
+    "getObject",             // kNameCacheGetObject
+    "getObjectVolatile",     // kNameCacheGetObjectVolatile
+    "putObject",             // kNameCachePutObject
+    "putObjectVolatile",     // kNameCachePutObjectVolatile
+    "putOrderedObject",      // kNameCachePutOrderedObject
+};
+
+const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = {
+    // kProtoCacheI_I
+    { kClassCacheInt, 1, { kClassCacheInt } },
+    // kProtoCacheJ_J
+    { kClassCacheLong, 1, { kClassCacheLong } },
+    // kProtoCacheS_S
+    { kClassCacheShort, 1, { kClassCacheShort } },
+    // kProtoCacheD_D
+    { kClassCacheDouble, 1, { kClassCacheDouble } },
+    // kProtoCacheD_J
+    { kClassCacheLong, 1, { kClassCacheDouble } },
+    // kProtoCacheJ_D
+    { kClassCacheDouble, 1, { kClassCacheLong } },
+    // kProtoCacheF_I
+    { kClassCacheInt, 1, { kClassCacheFloat } },
+    // kProtoCacheI_F
+    { kClassCacheFloat, 1, { kClassCacheInt } },
+    // kProtoCacheII_I
+    { kClassCacheInt, 2, { kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheI_C
+    { kClassCacheChar, 1, { kClassCacheInt } },
+    // kProtoCacheString_I
+    { kClassCacheInt, 1, { kClassCacheJavaLangString } },
+    // kProtoCache_Z
+    { kClassCacheBoolean, 0, { } },
+    // kProtoCache_I
+    { kClassCacheInt, 0, { } },
+    // kProtoCache_Thread
+    { kClassCacheJavaLangThread, 0, { } },
+    // kProtoCacheJ_B
+    { kClassCacheByte, 1, { kClassCacheLong } },
+    // kProtoCacheJ_I
+    { kClassCacheInt, 1, { kClassCacheLong } },
+    // kProtoCacheJ_S
+    { kClassCacheShort, 1, { kClassCacheLong } },
+    // kProtoCacheJB_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheByte } },
+    // kProtoCacheJI_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheInt } },
+    // kProtoCacheJJ_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheJS_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheShort } },
+    // kProtoCacheObjectJII_Z
+    { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheObjectJJJ_Z
+    { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheObjectJObjectObject_Z
+    { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheJavaLangObject, kClassCacheJavaLangObject } },
+    // kProtoCacheObjectJ_I
+    { kClassCacheInt, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJI_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheInt } },
+    // kProtoCacheObjectJ_J
+    { kClassCacheLong, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJJ_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheObjectJ_Object
+    { kClassCacheJavaLangObject, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJObject_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheJavaLangObject } },
+};
+
+DexFileMethodInliner::~DexFileMethodInliner() {
+}
+
+DexFileMethodInliner::DexFileMethodInliner()
+    : dex_file_(NULL) {
+  COMPILE_ASSERT(kClassCacheFirst == 0, kClassCacheFirst_not_0);
+  COMPILE_ASSERT(arraysize(kClassCacheNames) == kClassCacheLast, bad_arraysize_kClassCacheNames);
+  COMPILE_ASSERT(kNameCacheFirst == 0, kNameCacheFirst_not_0);
+  COMPILE_ASSERT(arraysize(kNameCacheNames) == kNameCacheLast, bad_arraysize_kNameCacheNames);
+  COMPILE_ASSERT(kProtoCacheFirst == 0, kProtoCacheFirst_not_0);
+  COMPILE_ASSERT(arraysize(kProtoCacheDefs) == kProtoCacheLast, bad_arraysize_kProtoCacheNames);
+}
+
+bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index) const {
+  return intrinsics_.find(method_index) != intrinsics_.end();
+}
+
+bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) const {
+  auto it = intrinsics_.find(info->index);
+  if (it == intrinsics_.end()) {
+    return false;
+  }
+  const Intrinsic& intrinsic = it->second;
+  switch (intrinsic.opcode) {
+    case kIntrinsicDoubleCvt:
+      return backend->GenInlinedDoubleCvt(info);
+    case kIntrinsicFloatCvt:
+      return backend->GenInlinedFloatCvt(info);
+    case kIntrinsicReverseBytes:
+      return backend->GenInlinedReverseBytes(info, static_cast<OpSize>(intrinsic.data));
+    case kIntrinsicAbsInt:
+      return backend->GenInlinedAbsInt(info);
+    case kIntrinsicAbsLong:
+      return backend->GenInlinedAbsLong(info);
+    case kIntrinsicMinMaxInt:
+      return backend->GenInlinedMinMaxInt(info, intrinsic.data & kIntrinsicFlagMin);
+    case kIntrinsicSqrt:
+      return backend->GenInlinedSqrt(info);
+    case kIntrinsicCharAt:
+      return backend->GenInlinedCharAt(info);
+    case kIntrinsicCompareTo:
+      return backend->GenInlinedStringCompareTo(info);
+    case kIntrinsicIsEmptyOrLength:
+      return backend->GenInlinedStringIsEmptyOrLength(info, intrinsic.data & kIntrinsicFlagIsEmpty);
+    case kIntrinsicIndexOf:
+      return backend->GenInlinedIndexOf(info, intrinsic.data & kIntrinsicFlagBase0);
+    case kIntrinsicCurrentThread:
+      return backend->GenInlinedCurrentThread(info);
+    case kIntrinsicPeek:
+      return backend->GenInlinedPeek(info, static_cast<OpSize>(intrinsic.data));
+    case kIntrinsicPoke:
+      return backend->GenInlinedPoke(info, static_cast<OpSize>(intrinsic.data));
+    case kIntrinsicCas:
+      return backend->GenInlinedCas(info, intrinsic.data & kIntrinsicFlagIsLong,
+                                    intrinsic.data & kIntrinsicFlagIsObject);
+    case kIntrinsicUnsafeGet:
+      return backend->GenInlinedUnsafeGet(info, intrinsic.data & kIntrinsicFlagIsLong,
+                                          intrinsic.data & kIntrinsicFlagIsVolatile);
+    case kIntrinsicUnsafePut:
+      return backend->GenInlinedUnsafePut(info, intrinsic.data & kIntrinsicFlagIsLong,
+                                          intrinsic.data & kIntrinsicFlagIsObject,
+                                          intrinsic.data & kIntrinsicFlagIsVolatile,
+                                          intrinsic.data & kIntrinsicFlagIsOrdered);
+    default:
+      LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
+      return false;  // avoid warning "control reaches end of non-void function"
+  }
+}
+
+uint32_t DexFileMethodInliner::FindClassIndex(const DexFile* dex_file, IndexCache* cache,
+                                              ClassCacheIndex index) {
+  uint32_t* class_index = &cache->class_indexes[index];
+  if (*class_index != kIndexUnresolved) {
+    return *class_index;
+  }
+
+  const DexFile::StringId* string_id = dex_file->FindStringId(kClassCacheNames[index]);
+  if (string_id == nullptr) {
+    *class_index = kIndexNotFound;
+    return *class_index;
+  }
+  uint32_t string_index = dex_file->GetIndexForStringId(*string_id);
+
+  const DexFile::TypeId* type_id = dex_file->FindTypeId(string_index);
+  if (type_id == nullptr) {
+    *class_index = kIndexNotFound;
+    return *class_index;
+  }
+  *class_index = dex_file->GetIndexForTypeId(*type_id);
+  return *class_index;
+}
+
+uint32_t DexFileMethodInliner::FindNameIndex(const DexFile* dex_file, IndexCache* cache,
+                                             NameCacheIndex index) {
+  uint32_t* name_index = &cache->name_indexes[index];
+  if (*name_index != kIndexUnresolved) {
+    return *name_index;
+  }
+
+  const DexFile::StringId* string_id = dex_file->FindStringId(kNameCacheNames[index]);
+  if (string_id == nullptr) {
+    *name_index = kIndexNotFound;
+    return *name_index;
+  }
+  *name_index = dex_file->GetIndexForStringId(*string_id);
+  return *name_index;
+}
+
+uint32_t DexFileMethodInliner::FindProtoIndex(const DexFile* dex_file, IndexCache* cache,
+                                              ProtoCacheIndex index) {
+  uint32_t* proto_index = &cache->proto_indexes[index];
+  if (*proto_index != kIndexUnresolved) {
+    return *proto_index;
+  }
+
+  const ProtoDef& proto_def = kProtoCacheDefs[index];
+  uint32_t return_index = FindClassIndex(dex_file, cache, proto_def.return_type);
+  if (return_index == kIndexNotFound) {
+    *proto_index = kIndexNotFound;
+    return *proto_index;
+  }
+  uint16_t return_type = static_cast<uint16_t>(return_index);
+  DCHECK_EQ(static_cast<uint32_t>(return_type), return_index);
+
+  uint32_t signature_length = proto_def.param_count;
+  uint16_t signature_type_idxs[kProtoMaxParams];
+  for (uint32_t i = 0; i != signature_length; ++i) {
+    uint32_t param_index = FindClassIndex(dex_file, cache, proto_def.params[i]);
+    if (param_index == kIndexNotFound) {
+      *proto_index = kIndexNotFound;
+      return *proto_index;
+    }
+    signature_type_idxs[i] = static_cast<uint16_t>(param_index);
+    DCHECK_EQ(static_cast<uint32_t>(signature_type_idxs[i]), param_index);
+  }
+
+  const DexFile::ProtoId* proto_id = dex_file->FindProtoId(return_type, signature_type_idxs,
+                                                           signature_length);
+  if (proto_id == nullptr) {
+    *proto_index = kIndexNotFound;
+    return *proto_index;
+  }
+  *proto_index = dex_file->GetIndexForProtoId(*proto_id);
+  return *proto_index;
+}
+
+uint32_t DexFileMethodInliner::FindMethodIndex(const DexFile* dex_file, IndexCache* cache,
+                                               const MethodDef& method_def) {
+  uint32_t declaring_class_index = FindClassIndex(dex_file, cache, method_def.declaring_class);
+  if (declaring_class_index == kIndexNotFound) {
+    return kIndexNotFound;
+  }
+  uint32_t name_index = FindNameIndex(dex_file, cache, method_def.name);
+  if (name_index == kIndexNotFound) {
+    return kIndexNotFound;
+  }
+  uint32_t proto_index = FindProtoIndex(dex_file, cache, method_def.proto);
+  if (proto_index == kIndexNotFound) {
+    return kIndexNotFound;
+  }
+  const DexFile::MethodId* method_id =
+      dex_file->FindMethodId(dex_file->GetTypeId(declaring_class_index),
+                             dex_file->GetStringId(name_index),
+                             dex_file->GetProtoId(proto_index));
+  if (method_id == nullptr) {
+    return kIndexNotFound;
+  }
+  return dex_file->GetIndexForMethodId(*method_id);
+}
+
+DexFileMethodInliner::IndexCache::IndexCache() {
+  std::fill_n(class_indexes, arraysize(class_indexes), kIndexUnresolved);
+  std::fill_n(name_indexes, arraysize(name_indexes), kIndexUnresolved);
+  std::fill_n(proto_indexes, arraysize(proto_indexes), kIndexUnresolved);
+}
+
+void DexFileMethodInliner::DoFindIntrinsics(const DexFile* dex_file, IndexCache* cache,
+                                            const IntrinsicDef* defs, uint32_t def_count) {
+  DCHECK(dex_file != nullptr);
+  DCHECK(dex_file_ == nullptr);
+  for (uint32_t i = 0u; i != def_count; ++i) {
+    uint32_t method_id = FindMethodIndex(dex_file, cache, defs[i].method_def);
+    if (method_id != kIndexNotFound) {
+      DCHECK(intrinsics_.find(method_id) == intrinsics_.end());
+      intrinsics_[method_id] = defs[i].intrinsic;
+    }
+  }
+  dex_file_ = dex_file;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
new file mode 100644
index 0000000..bc00513
--- /dev/null
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -0,0 +1,318 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
+
+#include <stdint.h>
+#include <map>
+
+namespace art {
+
+class CallInfo;
+class DexFile;
+class Mir2Lir;
+
+enum IntrinsicOpcode {
+  kIntrinsicDoubleCvt,
+  kIntrinsicFloatCvt,
+  kIntrinsicReverseBytes,
+  kIntrinsicAbsInt,
+  kIntrinsicAbsLong,
+  kIntrinsicMinMaxInt,
+  kIntrinsicSqrt,
+  kIntrinsicCharAt,
+  kIntrinsicCompareTo,
+  kIntrinsicIsEmptyOrLength,
+  kIntrinsicIndexOf,
+  kIntrinsicCurrentThread,
+  kIntrinsicPeek,
+  kIntrinsicPoke,
+  kIntrinsicCas,
+  kIntrinsicUnsafeGet,
+  kIntrinsicUnsafePut,
+};
+
+enum IntrinsicFlags {
+  kIntrinsicFlagNone = 0,
+
+  // kIntrinsicMinMaxInt
+  kIntrinsicFlagMax = kIntrinsicFlagNone,
+  kIntrinsicFlagMin = 1,
+
+  // kIntrinsicIsEmptyOrLength
+  kIntrinsicFlagLength  = kIntrinsicFlagNone,
+  kIntrinsicFlagIsEmpty = 1,
+
+  // kIntrinsicIndexOf
+  kIntrinsicFlagBase0 = 1,
+
+  // kIntrinsicUnsafeGet, kIntrinsicUnsafePut, kIntrinsicUnsafeCas
+  kIntrinsicFlagIsLong     = 1,
+  // kIntrinsicUnsafeGet, kIntrinsicUnsafePut
+  kIntrinsicFlagIsVolatile = 2,
+  // kIntrinsicUnsafePut, kIntrinsicUnsafeCas
+  kIntrinsicFlagIsObject   = 4,
+  // kIntrinsicUnsafePut
+  kIntrinsicFlagIsOrdered  = 8,
+};
+
+struct Intrinsic {
+  IntrinsicOpcode opcode;
+  uint32_t data;
+};
+
+/**
+ * Handles inlining of methods from a particular DexFile.
+ *
+ * Intrinsics are a special case of inline methods. The DexFile indices for
+ * all the supported intrinsic methods are looked up once by the FindIntrinsics
+ * function and cached by this class for quick lookup by the method index.
+ *
+ * TODO: Detect short methods (at least getters, setters and empty functions)
+ * from the verifier and mark them for inlining. Inline these methods early
+ * during compilation to allow further optimizations. Similarly, provide
+ * additional information about intrinsics to the early phases of compilation.
+ */
+class DexFileMethodInliner {
+  public:
+    virtual ~DexFileMethodInliner();
+
+    /**
+     * Find all known intrinsic methods in the dex_file and cache their indices.
+     */
+    virtual void FindIntrinsics(const DexFile* dex_file) = 0;
+
+    /**
+     * Check whether a particular method index corresponds to an intrinsic function.
+     */
+    bool IsIntrinsic(uint32_t method_index) const;
+
+    /**
+     * Generate code for an intrinsic function invocation.
+     *
+     * TODO: This should be target-specific. For the time being,
+     * it's shared since it dispatches everything to backend.
+     */
+    bool GenIntrinsic(Mir2Lir* backend, CallInfo* info) const;
+
+  protected:
+    DexFileMethodInliner();
+
+    /**
+     * To avoid multiple lookups of a class by its descriptor, we cache its
+     * type index in the IndexCache. These are the indexes into the IndexCache
+     * class_indexes array.
+     */
+    enum ClassCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
+      kClassCacheFirst = 0,
+      kClassCacheBoolean = kClassCacheFirst,
+      kClassCacheByte,
+      kClassCacheChar,
+      kClassCacheShort,
+      kClassCacheInt,
+      kClassCacheLong,
+      kClassCacheFloat,
+      kClassCacheDouble,
+      kClassCacheVoid,
+      kClassCacheJavaLangObject,
+      kClassCacheJavaLangString,
+      kClassCacheJavaLangDouble,
+      kClassCacheJavaLangFloat,
+      kClassCacheJavaLangInteger,
+      kClassCacheJavaLangLong,
+      kClassCacheJavaLangShort,
+      kClassCacheJavaLangMath,
+      kClassCacheJavaLangStrictMath,
+      kClassCacheJavaLangThread,
+      kClassCacheLibcoreIoMemory,
+      kClassCacheSunMiscUnsafe,
+      kClassCacheLast
+    };
+
+    /**
+     * To avoid multiple lookups of a method name string, we cache its string
+     * index in the IndexCache. These are the indexes into the IndexCache
+     * name_indexes array.
+     */
+    enum NameCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
+      kNameCacheFirst = 0,
+      kNameCacheReverseBytes = kNameCacheFirst,
+      kNameCacheDoubleToRawLongBits,
+      kNameCacheLongBitsToDouble,
+      kNameCacheFloatToRawIntBits,
+      kNameCacheIntBitsToFloat,
+      kNameCacheAbs,
+      kNameCacheMax,
+      kNameCacheMin,
+      kNameCacheSqrt,
+      kNameCacheCharAt,
+      kNameCacheCompareTo,
+      kNameCacheIsEmpty,
+      kNameCacheIndexOf,
+      kNameCacheLength,
+      kNameCacheCurrentThread,
+      kNameCachePeekByte,
+      kNameCachePeekIntNative,
+      kNameCachePeekLongNative,
+      kNameCachePeekShortNative,
+      kNameCachePokeByte,
+      kNameCachePokeIntNative,
+      kNameCachePokeLongNative,
+      kNameCachePokeShortNative,
+      kNameCacheCompareAndSwapInt,
+      kNameCacheCompareAndSwapLong,
+      kNameCacheCompareAndSwapObject,
+      kNameCacheGetInt,
+      kNameCacheGetIntVolatile,
+      kNameCachePutInt,
+      kNameCachePutIntVolatile,
+      kNameCachePutOrderedInt,
+      kNameCacheGetLong,
+      kNameCacheGetLongVolatile,
+      kNameCachePutLong,
+      kNameCachePutLongVolatile,
+      kNameCachePutOrderedLong,
+      kNameCacheGetObject,
+      kNameCacheGetObjectVolatile,
+      kNameCachePutObject,
+      kNameCachePutObjectVolatile,
+      kNameCachePutOrderedObject,
+      kNameCacheLast
+    };
+
+    /**
+     * To avoid multiple lookups of a method signature, we cache its proto
+     * index in the IndexCache. These are the indexes into the IndexCache
+     * proto_indexes array.
+     */
+    enum ProtoCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
+      kProtoCacheFirst = 0,
+      kProtoCacheI_I = kProtoCacheFirst,
+      kProtoCacheJ_J,
+      kProtoCacheS_S,
+      kProtoCacheD_D,
+      kProtoCacheD_J,
+      kProtoCacheJ_D,
+      kProtoCacheF_I,
+      kProtoCacheI_F,
+      kProtoCacheII_I,
+      kProtoCacheI_C,
+      kProtoCacheString_I,
+      kProtoCache_Z,
+      kProtoCache_I,
+      kProtoCache_Thread,
+      kProtoCacheJ_B,
+      kProtoCacheJ_I,
+      kProtoCacheJ_S,
+      kProtoCacheJB_V,
+      kProtoCacheJI_V,
+      kProtoCacheJJ_V,
+      kProtoCacheJS_V,
+      kProtoCacheObjectJII_Z,
+      kProtoCacheObjectJJJ_Z,
+      kProtoCacheObjectJObjectObject_Z,
+      kProtoCacheObjectJ_I,
+      kProtoCacheObjectJI_V,
+      kProtoCacheObjectJ_J,
+      kProtoCacheObjectJJ_V,
+      kProtoCacheObjectJ_Object,
+      kProtoCacheObjectJObject_V,
+      kProtoCacheLast
+    };
+
+    /**
+     * The maximum number of method parameters we support in the ProtoDef.
+     */
+    static constexpr uint32_t kProtoMaxParams = 6;
+
+    /**
+     * The method signature (proto) definition using cached class indexes.
+     * The return_type and params are used with the IndexCache to look up
+     * appropriate class indexes to be passed to DexFile::FindProtoId().
+     */
+    struct ProtoDef {
+      ClassCacheIndex return_type;
+      uint8_t param_count;
+      ClassCacheIndex params[kProtoMaxParams];
+    };
+
+    /**
+     * The method definition using cached class, name and proto indexes.
+     * The class index, method name index and proto index are used with
+     * IndexCache to look up appropriate parameters for DexFile::FindMethodId().
+     */
+    struct MethodDef {
+      ClassCacheIndex declaring_class;
+      NameCacheIndex name;
+      ProtoCacheIndex proto;
+    };
+
+    /**
+     * The definition of an intrinsic function binds the method definition
+     * to an Intrinsic.
+     */
+    struct IntrinsicDef {
+      MethodDef method_def;
+      Intrinsic intrinsic;
+    };
+
+    /**
+     * Cache for class, method name and method signature indexes used during
+     * intrinsic function lookup to avoid multiple lookups of the same items.
+     *
+     * Many classes have multiple intrinsics and/or they are used in multiple
+     * method signatures and we want to avoid repeated lookups since they are
+     * not exactly cheap. The method names and method signatures are sometimes
+     * reused and therefore cached as well.
+     */
+    struct IndexCache {
+      IndexCache();
+
+      uint32_t class_indexes[kClassCacheLast - kClassCacheFirst];
+      uint32_t name_indexes[kNameCacheLast - kNameCacheFirst];
+      uint32_t proto_indexes[kProtoCacheLast - kProtoCacheFirst];
+    };
+
+    static const char* kClassCacheNames[];
+    static const char* kNameCacheNames[];
+    static const ProtoDef kProtoCacheDefs[];
+
+    static const uint32_t kIndexNotFound = static_cast<uint32_t>(-1);
+    static const uint32_t kIndexUnresolved = static_cast<uint32_t>(-2);
+
+    static uint32_t FindClassIndex(const DexFile* dex_file, IndexCache* cache,
+                                   ClassCacheIndex index);
+    static uint32_t FindNameIndex(const DexFile* dex_file, IndexCache* cache,
+                                  NameCacheIndex index);
+    static uint32_t FindProtoIndex(const DexFile* dex_file, IndexCache* cache,
+                                   ProtoCacheIndex index);
+    static uint32_t FindMethodIndex(const DexFile* dex_file, IndexCache* cache,
+                                    const MethodDef& method_def);
+
+    void DoFindIntrinsics(const DexFile* dex_file, IndexCache* cache,
+                          const IntrinsicDef* defs, uint32_t def_count);
+
+    /*
+     * Maps method indexes (for the particular DexFile) to Intrinsic defintions.
+     */
+    std::map<uint32_t, Intrinsic> intrinsics_;
+    const DexFile* dex_file_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/dex/quick/dex_file_to_method_inliner_map.cc b/compiler/dex/quick/dex_file_to_method_inliner_map.cc
new file mode 100644
index 0000000..56a42bc
--- /dev/null
+++ b/compiler/dex/quick/dex_file_to_method_inliner_map.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <utility>
+#include "thread.h"
+#include "thread-inl.h"
+#include "base/mutex.h"
+#include "base/mutex-inl.h"
+#include "base/logging.h"
+#include "driver/compiler_driver.h"
+#include "dex/quick/arm/arm_dex_file_method_inliner.h"
+#include "dex/quick/mips/mips_dex_file_method_inliner.h"
+#include "dex/quick/x86/x86_dex_file_method_inliner.h"
+
+#include "dex_file_to_method_inliner_map.h"
+
+namespace art {
+
+DexFileToMethodInlinerMap::DexFileToMethodInlinerMap(const CompilerDriver* compiler)
+    : compiler_(compiler),
+      mutex_("inline_helper_mutex") {
+}
+
+DexFileToMethodInlinerMap::~DexFileToMethodInlinerMap() {
+  for (auto& entry : inliners_) {
+    delete entry.second;
+  }
+}
+
+const DexFileMethodInliner& DexFileToMethodInlinerMap::GetMethodInliner(const DexFile* dex_file) {
+  Thread* self = Thread::Current();
+  {
+    ReaderMutexLock lock(self, mutex_);
+    auto it = inliners_.find(dex_file);
+    if (it != inliners_.end()) {
+      return *it->second;
+    }
+  }
+
+  WriterMutexLock lock(self, mutex_);
+  DexFileMethodInliner** inliner = &inliners_[dex_file];  // inserts new entry if not found
+  if (*inliner) {
+    return **inliner;
+  }
+  switch (compiler_->GetInstructionSet()) {
+    case kThumb2:
+      *inliner = new ArmDexFileMethodInliner;
+      break;
+    case kX86:
+      *inliner = new X86DexFileMethodInliner;
+      break;
+    case kMips:
+      *inliner = new MipsDexFileMethodInliner;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected instruction set: " << compiler_->GetInstructionSet();
+  }
+  DCHECK(*inliner != nullptr);
+  // TODO: per-dex file locking for the intrinsics container filling.
+  (*inliner)->FindIntrinsics(dex_file);
+  return **inliner;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/dex_file_to_method_inliner_map.h b/compiler/dex/quick/dex_file_to_method_inliner_map.h
new file mode 100644
index 0000000..77f2648
--- /dev/null
+++ b/compiler/dex/quick/dex_file_to_method_inliner_map.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_DEX_FILE_TO_METHOD_INLINER_MAP_H_
+#define ART_COMPILER_DEX_QUICK_DEX_FILE_TO_METHOD_INLINER_MAP_H_
+
+#include <map>
+#include <vector>
+#include "base/macros.h"
+#include "base/mutex.h"
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class CompilerDriver;
+class DexFile;
+
+/**
+ * Map each DexFile to its DexFileMethodInliner.
+ *
+ * The method inliner is created and initialized the first time it's requested
+ * for a particular DexFile.
+ */
+class DexFileToMethodInlinerMap {
+  public:
+    explicit DexFileToMethodInlinerMap(const CompilerDriver* compiler);
+    ~DexFileToMethodInlinerMap();
+
+    const DexFileMethodInliner& GetMethodInliner(const DexFile* dex_file) LOCKS_EXCLUDED(mutex_);
+
+  private:
+    const CompilerDriver* const compiler_;
+    ReaderWriterMutex mutex_;
+    std::map<const DexFile*, DexFileMethodInliner*> inliners_ GUARDED_BY(mutex_);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_DEX_FILE_TO_METHOD_INLINER_MAP_H_
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 7225262..469c577 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -15,6 +15,9 @@
  */
 
 #include "dex/compiler_ir.h"
+#include "dex/frontend.h"
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex_file-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "invoke_type.h"
@@ -1227,198 +1230,16 @@
   return true;
 }
 
-bool Mir2Lir::GenIntrinsic(CallInfo* info) {
-  if (info->opt_flags & MIR_INLINED) {
-    return false;
-  }
-  /*
-   * TODO: move these to a target-specific structured constant array
-   * and use a generic match function.  The list of intrinsics may be
-   * slightly different depending on target.
-   * TODO: Fold this into a matching function that runs during
-   * basic block building.  This should be part of the action for
-   * small method inlining and recognition of the special object init
-   * method.  By doing this during basic block construction, we can also
-   * take advantage of/generate new useful dataflow info.
-   */
-  const DexFile::MethodId& target_mid = cu_->dex_file->GetMethodId(info->index);
-  const DexFile::TypeId& declaring_type = cu_->dex_file->GetTypeId(target_mid.class_idx_);
-  StringPiece tgt_methods_declaring_class(
-      cu_->dex_file->StringDataByIdx(declaring_type.descriptor_idx_));
-  if (tgt_methods_declaring_class.starts_with("Ljava/lang/Double;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "long java.lang.Double.doubleToRawLongBits(double)") {
-      return GenInlinedDoubleCvt(info);
-    }
-    if (tgt_method == "double java.lang.Double.longBitsToDouble(long)") {
-      return GenInlinedDoubleCvt(info);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Float;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "int java.lang.Float.floatToRawIntBits(float)") {
-      return GenInlinedFloatCvt(info);
-    }
-    if (tgt_method == "float java.lang.Float.intBitsToFloat(int)") {
-      return GenInlinedFloatCvt(info);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Integer;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "int java.lang.Integer.reverseBytes(int)") {
-      return GenInlinedReverseBytes(info, kWord);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Long;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "long java.lang.Long.reverseBytes(long)") {
-      return GenInlinedReverseBytes(info, kLong);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Math;") ||
-             tgt_methods_declaring_class.starts_with("Ljava/lang/StrictMath;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "int java.lang.Math.abs(int)" ||
-        tgt_method == "int java.lang.StrictMath.abs(int)") {
-      return GenInlinedAbsInt(info);
-    }
-    if (tgt_method == "long java.lang.Math.abs(long)" ||
-        tgt_method == "long java.lang.StrictMath.abs(long)") {
-      return GenInlinedAbsLong(info);
-    }
-    if (tgt_method == "int java.lang.Math.max(int, int)" ||
-        tgt_method == "int java.lang.StrictMath.max(int, int)") {
-      return GenInlinedMinMaxInt(info, false /* is_min */);
-    }
-    if (tgt_method == "int java.lang.Math.min(int, int)" ||
-        tgt_method == "int java.lang.StrictMath.min(int, int)") {
-      return GenInlinedMinMaxInt(info, true /* is_min */);
-    }
-    if (tgt_method == "double java.lang.Math.sqrt(double)" ||
-        tgt_method == "double java.lang.StrictMath.sqrt(double)") {
-      return GenInlinedSqrt(info);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Short;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "short java.lang.Short.reverseBytes(short)") {
-      return GenInlinedReverseBytes(info, kSignedHalf);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/String;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "char java.lang.String.charAt(int)") {
-      return GenInlinedCharAt(info);
-    }
-    if (tgt_method == "int java.lang.String.compareTo(java.lang.String)") {
-      return GenInlinedStringCompareTo(info);
-    }
-    if (tgt_method == "boolean java.lang.String.is_empty()") {
-      return GenInlinedStringIsEmptyOrLength(info, true /* is_empty */);
-    }
-    if (tgt_method == "int java.lang.String.index_of(int, int)") {
-      return GenInlinedIndexOf(info, false /* base 0 */);
-    }
-    if (tgt_method == "int java.lang.String.index_of(int)") {
-      return GenInlinedIndexOf(info, true /* base 0 */);
-    }
-    if (tgt_method == "int java.lang.String.length()") {
-      return GenInlinedStringIsEmptyOrLength(info, false /* is_empty */);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Thread;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "java.lang.Thread java.lang.Thread.currentThread()") {
-      return GenInlinedCurrentThread(info);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Llibcore/io/Memory;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "byte libcore.io.Memory.peekByte(long)") {
-      return GenInlinedPeek(info, kSignedByte);
-    }
-    if (tgt_method == "int libcore.io.Memory.peekIntNative(long)") {
-      return GenInlinedPeek(info, kWord);
-    }
-    if (tgt_method == "long libcore.io.Memory.peekLongNative(long)") {
-      return GenInlinedPeek(info, kLong);
-    }
-    if (tgt_method == "short libcore.io.Memory.peekShortNative(long)") {
-      return GenInlinedPeek(info, kSignedHalf);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeByte(long, byte)") {
-      return GenInlinedPoke(info, kSignedByte);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeIntNative(long, int)") {
-      return GenInlinedPoke(info, kWord);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeLongNative(long, long)") {
-      return GenInlinedPoke(info, kLong);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeShortNative(long, short)") {
-      return GenInlinedPoke(info, kSignedHalf);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Lsun/misc/Unsafe;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") {
-      return GenInlinedCas32(info, false);
-    }
-    if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") {
-      return GenInlinedCas32(info, true);
-    }
-    if (tgt_method == "int sun.misc.Unsafe.getInt(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, false /* is_volatile */);
-    }
-    if (tgt_method == "int sun.misc.Unsafe.getIntVolatile(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, true /* is_volatile */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putInt(java.lang.Object, long, int)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putIntVolatile(java.lang.Object, long, int)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */,
-                                 true /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putOrderedInt(java.lang.Object, long, int)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, true /* is_ordered */);
-    }
-    if (tgt_method == "long sun.misc.Unsafe.getLong(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, true /* is_long */, false /* is_volatile */);
-    }
-    if (tgt_method == "long sun.misc.Unsafe.getLongVolatile(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, true /* is_long */, true /* is_volatile */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putLong(java.lang.Object, long, long)") {
-      return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putLongVolatile(java.lang.Object, long, long)") {
-      return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */,
-                                 true /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putOrderedLong(java.lang.Object, long, long)") {
-      return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, true /* is_ordered */);
-    }
-    if (tgt_method == "java.lang.Object sun.misc.Unsafe.getObject(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, false /* is_volatile */);
-    }
-    if (tgt_method == "java.lang.Object sun.misc.Unsafe.getObjectVolatile(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, true /* is_volatile */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */,
-                                 false /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putObjectVolatile(java.lang.Object, long, java.lang.Object)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */,
-                                 true /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */,
-                                 false /* is_volatile */, true /* is_ordered */);
-    }
-  }
-  return false;
-}
-
 void Mir2Lir::GenInvoke(CallInfo* info) {
-  if (GenIntrinsic(info)) {
-    return;
+  if (!(info->opt_flags & MIR_INLINED)) {
+    if (inliner_ == nullptr) {
+      QuickCompilerContext* context = reinterpret_cast<QuickCompilerContext*>(
+          cu_->compiler_driver->GetCompilerContext());
+      inliner_ = &context->GetInlinerMap()->GetMethodInliner(cu_->dex_file);
+    }
+    if (inliner_->GenIntrinsic(this, info)) {
+      return;
+    }
   }
   InvokeType original_type = info->type;  // avoiding mutation by ComputeInvokeInfo
   int call_state = 0;
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 88b244b..5dda445 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -104,7 +104,7 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                           RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-    bool GenInlinedCas32(CallInfo* info, bool need_write_barrier);
+    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
     bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 5229429..dfff260 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -258,7 +258,7 @@
   LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
 }
 
-bool MipsMir2Lir::GenInlinedCas32(CallInfo* info, bool need_write_barrier) {
+bool MipsMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK_NE(cu_->instruction_set, kThumb2);
   return false;
 }
diff --git a/compiler/dex/quick/mips/mips_dex_file_method_inliner.cc b/compiler/dex/quick/mips/mips_dex_file_method_inliner.cc
new file mode 100644
index 0000000..05d8ac8
--- /dev/null
+++ b/compiler/dex/quick/mips/mips_dex_file_method_inliner.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "dex/compiler_enums.h"
+
+#include "mips_dex_file_method_inliner.h"
+
+namespace art {
+
+const DexFileMethodInliner::IntrinsicDef MipsDexFileMethodInliner::kIntrinsicMethods[] = {
+#define INTRINSIC(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, d } }
+
+    // INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
+    // INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    // INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
+    // INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+
+    // INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
+    // INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    // INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+
+    // INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
+    // INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
+    // INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
+    // INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    // INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    // INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    // INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    // INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    // INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
+    // INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
+
+    // INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
+    // INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    // INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
+    // INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
+    // INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
+    // INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
+
+    INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
+
+    INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
+    // INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
+    // INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    // INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
+    INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
+    // INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
+    // INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    // INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
+
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas,
+    //           kIntrinsicFlagNone),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsLong),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsObject),
+
+#define UNSAFE_GET_PUT(type, code, type_flags) \
+    INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              type_flags & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              (type_flags | kIntrinsicFlagIsVolatile) & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags), \
+    INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsVolatile), \
+    INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsOrdered)
+
+    // UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
+    // UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
+    // UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
+#undef UNSAFE_GET_PUT
+
+#undef INTRINSIC
+};
+
+MipsDexFileMethodInliner::MipsDexFileMethodInliner() {
+}
+
+MipsDexFileMethodInliner::~MipsDexFileMethodInliner() {
+}
+
+void MipsDexFileMethodInliner::FindIntrinsics(const DexFile* dex_file) {
+  IndexCache cache;
+  DoFindIntrinsics(dex_file, &cache, kIntrinsicMethods, arraysize(kIntrinsicMethods));
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/mips/mips_dex_file_method_inliner.h b/compiler/dex/quick/mips/mips_dex_file_method_inliner.h
new file mode 100644
index 0000000..8fe7ec7
--- /dev/null
+++ b/compiler/dex/quick/mips/mips_dex_file_method_inliner.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_MIPS_MIPS_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_MIPS_MIPS_DEX_FILE_METHOD_INLINER_H_
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class MipsDexFileMethodInliner : public DexFileMethodInliner {
+  public:
+    MipsDexFileMethodInliner();
+    ~MipsDexFileMethodInliner();
+
+    void FindIntrinsics(const DexFile* dex_file);
+
+  private:
+    static const IntrinsicDef kIntrinsicMethods[];
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_MIPS_MIPS_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 1a30b7a..f567b5c 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -198,6 +198,10 @@
     SetupRegMask(&lir->u.m.use_mask, lir->operands[3]);
   }
 
+  if (flags & REG_USE4) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[4]);
+  }
+
   if (flags & SETS_CCODES) {
     lir->u.m.def_mask |= ENCODE_CCODE;
   }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4c56b74..ad9b0de 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -106,6 +106,7 @@
 struct LIR;
 struct RegLocation;
 struct RegisterInfo;
+class DexFileMethodInliner;
 class MIRGraph;
 class Mir2Lir;
 
@@ -555,7 +556,6 @@
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
-    bool GenIntrinsic(CallInfo* info);
     int LoadArgRegs(CallInfo* info, int call_state,
                     NextCallInsn next_call_insn,
                     const MethodReference& target_method,
@@ -661,7 +661,7 @@
                           RegLocation rl_src1, RegLocation rl_src2) = 0;
     virtual void GenConversion(Instruction::Code opcode, RegLocation rl_dest,
                                RegLocation rl_src) = 0;
-    virtual bool GenInlinedCas32(CallInfo* info, bool need_write_barrier) = 0;
+    virtual bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object) = 0;
     virtual bool GenInlinedMinMaxInt(CallInfo* info, bool is_min) = 0;
     virtual bool GenInlinedSqrt(CallInfo* info) = 0;
     virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
@@ -837,6 +837,8 @@
     unsigned int fp_spill_mask_;
     LIR* first_lir_insn_;
     LIR* last_lir_insn_;
+    // Lazily retrieved method inliner for intrinsics.
+    const DexFileMethodInliner* inliner_;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 1d6509e..ffe2d67 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -104,7 +104,7 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                           RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-    bool GenInlinedCas32(CallInfo* info, bool need_write_barrier);
+    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
     bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 499547b..01d5c17 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -281,7 +281,7 @@
   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
 }
 
-bool X86Mir2Lir::GenInlinedCas32(CallInfo* info, bool need_write_barrier) {
+bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK_NE(cu_->instruction_set, kThumb2);
   return false;
 }
diff --git a/compiler/dex/quick/x86/x86_dex_file_method_inliner.cc b/compiler/dex/quick/x86/x86_dex_file_method_inliner.cc
new file mode 100644
index 0000000..b788c3c
--- /dev/null
+++ b/compiler/dex/quick/x86/x86_dex_file_method_inliner.cc
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "dex/compiler_enums.h"
+
+#include "x86_dex_file_method_inliner.h"
+
+namespace art {
+
+const DexFileMethodInliner::IntrinsicDef X86DexFileMethodInliner::kIntrinsicMethods[] = {
+#define INTRINSIC(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, d } }
+
+    INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
+    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+
+    INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
+    INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+
+    INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    // INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
+    // INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
+
+    INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
+    INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
+    INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
+    INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
+
+    INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
+
+    INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
+    INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
+    INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
+    INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
+
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas,
+    //           kIntrinsicFlagNone),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsLong),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsObject),
+
+#define UNSAFE_GET_PUT(type, code, type_flags) \
+    INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              type_flags & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              (type_flags | kIntrinsicFlagIsVolatile) & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags), \
+    INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsVolatile), \
+    INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsOrdered)
+
+    UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
+    UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
+
+    // UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
+    // PutObject: "TODO: fix X86, it exhausts registers for card marking."
+    INTRINSIC(SunMiscUnsafe, GetObject, ObjectJ_Object, kIntrinsicUnsafeGet,
+              kIntrinsicFlagNone),
+    INTRINSIC(SunMiscUnsafe, GetObjectVolatile, ObjectJ_Object, kIntrinsicUnsafeGet,
+              kIntrinsicFlagIsVolatile),
+#undef UNSAFE_GET_PUT
+
+#undef INTRINSIC
+};
+
+X86DexFileMethodInliner::X86DexFileMethodInliner() {
+}
+
+X86DexFileMethodInliner::~X86DexFileMethodInliner() {
+}
+
+void X86DexFileMethodInliner::FindIntrinsics(const DexFile* dex_file) {
+  IndexCache cache;
+  DoFindIntrinsics(dex_file, &cache, kIntrinsicMethods, arraysize(kIntrinsicMethods));
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/x86/x86_dex_file_method_inliner.h b/compiler/dex/quick/x86/x86_dex_file_method_inliner.h
new file mode 100644
index 0000000..7813e44
--- /dev/null
+++ b/compiler/dex/quick/x86/x86_dex_file_method_inliner.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_X86_X86_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_X86_X86_DEX_FILE_METHOD_INLINER_H_
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class X86DexFileMethodInliner : public DexFileMethodInliner {
+  public:
+    X86DexFileMethodInliner();
+    ~X86DexFileMethodInliner();
+
+    void FindIntrinsics(const DexFile* dex_file);
+
+  private:
+    static const IntrinsicDef kIntrinsicMethods[];
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_X86_X86_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 9cc94e8..7b42879 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -503,9 +503,9 @@
 
 void CompilerDriver::CompileAll(jobject class_loader,
                                 const std::vector<const DexFile*>& dex_files,
-                                base::TimingLogger& timings) {
+                                TimingLogger& timings) {
   DCHECK(!Runtime::Current()->IsStarted());
-  UniquePtr<ThreadPool> thread_pool(new ThreadPool(thread_count_ - 1));
+  UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
   PreCompile(class_loader, dex_files, *thread_pool.get(), timings);
   Compile(class_loader, dex_files, *thread_pool.get(), timings);
   if (dump_stats_) {
@@ -513,10 +513,9 @@
   }
 }
 
-static DexToDexCompilationLevel GetDexToDexCompilationlevel(mirror::ClassLoader* class_loader,
-                                                            const DexFile& dex_file,
-                                                            const DexFile::ClassDef& class_def)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+static DexToDexCompilationLevel GetDexToDexCompilationlevel(
+    SirtRef<mirror::ClassLoader>& class_loader, const DexFile& dex_file,
+    const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   mirror::Class* klass = class_linker->FindClass(descriptor, class_loader);
@@ -531,7 +530,7 @@
   // function). Since image classes can be verified again while compiling an application,
   // we must prevent the DEX-to-DEX compiler from introducing them.
   // TODO: find a way to enable "quick" instructions for image classes and remove this check.
-  bool compiling_image_classes = (class_loader == NULL);
+  bool compiling_image_classes = class_loader.get() == nullptr;
   if (compiling_image_classes) {
     return kRequired;
   } else if (klass->IsVerified()) {
@@ -547,7 +546,7 @@
   }
 }
 
-void CompilerDriver::CompileOne(const mirror::ArtMethod* method, base::TimingLogger& timings) {
+void CompilerDriver::CompileOne(const mirror::ArtMethod* method, TimingLogger& timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   Thread* self = Thread::Current();
   jobject jclass_loader;
@@ -569,7 +568,7 @@
   std::vector<const DexFile*> dex_files;
   dex_files.push_back(dex_file);
 
-  UniquePtr<ThreadPool> thread_pool(new ThreadPool(0U));
+  UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
   PreCompile(jclass_loader, dex_files, *thread_pool.get(), timings);
 
   uint32_t method_idx = method->GetDexMethodIndex();
@@ -579,7 +578,8 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx);
-    mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(jclass_loader);
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                              soa.Decode<mirror::ClassLoader*>(jclass_loader));
     dex_to_dex_compilation_level = GetDexToDexCompilationlevel(class_loader, *dex_file, class_def);
   }
   CompileMethod(code_item, method->GetAccessFlags(), method->GetInvokeType(),
@@ -591,7 +591,7 @@
 }
 
 void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool& thread_pool, base::TimingLogger& timings) {
+                             ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -600,7 +600,7 @@
 }
 
 void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                ThreadPool& thread_pool, TimingLogger& timings) {
   LoadImageClasses(timings);
 
   Resolve(class_loader, dex_files, thread_pool, timings);
@@ -685,7 +685,7 @@
 }
 
 // Make a list of descriptors for classes to include in the image
-void CompilerDriver::LoadImageClasses(base::TimingLogger& timings)
+void CompilerDriver::LoadImageClasses(TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_) {
   if (!IsImage()) {
     return;
@@ -697,11 +697,11 @@
   ScopedObjectAccess soa(self);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   for (auto it = image_classes_->begin(), end = image_classes_->end(); it != end;) {
-    std::string descriptor(*it);
+    const std::string& descriptor(*it);
     SirtRef<mirror::Class> klass(self, class_linker->FindSystemClass(descriptor.c_str()));
     if (klass.get() == NULL) {
-      image_classes_->erase(it++);
       VLOG(compiler) << "Failed to find class " << descriptor;
+      image_classes_->erase(it++);
       self->ClearException();
     } else {
       ++it;
@@ -721,8 +721,8 @@
     for (const std::pair<uint16_t, const DexFile*>& exception_type : unresolved_exception_types) {
       uint16_t exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
-      mirror::DexCache* dex_cache = class_linker->FindDexCache(*dex_file);
-      mirror:: ClassLoader* class_loader = NULL;
+      SirtRef<mirror::DexCache> dex_cache(self, class_linker->FindDexCache(*dex_file));
+      SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
       SirtRef<mirror::Class> klass(self, class_linker->ResolveType(*dex_file, exception_type_idx,
                                                                    dex_cache, class_loader));
       if (klass.get() == NULL) {
@@ -773,7 +773,7 @@
   MaybeAddToImageClasses(object->GetClass(), compiler_driver->image_classes_.get());
 }
 
-void CompilerDriver::UpdateImageClasses(base::TimingLogger& timings) {
+void CompilerDriver::UpdateImageClasses(TimingLogger& timings) {
   if (IsImage()) {
     timings.NewSplit("UpdateImageClasses");
 
@@ -782,15 +782,14 @@
     const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter");
     gc::Heap* heap = Runtime::Current()->GetHeap();
     // TODO: Image spaces only?
+    ScopedObjectAccess soa(Thread::Current());
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    heap->FlushAllocStack();
-    heap->GetLiveBitmap()->Walk(FindClinitImageClassesCallback, this);
+    heap->VisitObjects(FindClinitImageClassesCallback, this);
     self->EndAssertNoThreadSuspension(old_cause);
   }
 }
 
-bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file,
-                                                      uint32_t type_idx) {
+bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
   if (IsImage() &&
       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
     if (kIsDebugBuild) {
@@ -815,7 +814,7 @@
   if (IsImage()) {
     // We resolve all const-string strings when building for the image.
     ScopedObjectAccess soa(Thread::Current());
-    mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), Runtime::Current()->GetClassLinker()->FindDexCache(dex_file));
     Runtime::Current()->GetClassLinker()->ResolveString(dex_file, string_idx, dex_cache);
     result = true;
   }
@@ -903,26 +902,27 @@
 }
 
 static mirror::Class* ComputeCompilingMethodsClass(ScopedObjectAccess& soa,
-                                                   mirror::DexCache* dex_cache,
+                                                   SirtRef<mirror::DexCache>& dex_cache,
                                                    const DexCompilationUnit* mUnit)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // The passed dex_cache is a hint, sanity check before asking the class linker that will take a
   // lock.
   if (dex_cache->GetDexFile() != mUnit->GetDexFile()) {
-    dex_cache = mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile());
+    dex_cache.reset(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
   }
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader());
-  const DexFile::MethodId& referrer_method_id = mUnit->GetDexFile()->GetMethodId(mUnit->GetDexMethodIndex());
+  SirtRef<mirror::ClassLoader>
+      class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
+  const DexFile::MethodId& referrer_method_id =
+      mUnit->GetDexFile()->GetMethodId(mUnit->GetDexMethodIndex());
   return mUnit->GetClassLinker()->ResolveType(*mUnit->GetDexFile(), referrer_method_id.class_idx_,
                                               dex_cache, class_loader);
 }
 
-static mirror::ArtField* ComputeFieldReferencedFromCompilingMethod(ScopedObjectAccess& soa,
-                                                                const DexCompilationUnit* mUnit,
-                                                                uint32_t field_idx)
+static mirror::ArtField* ComputeFieldReferencedFromCompilingMethod(
+    ScopedObjectAccess& soa, const DexCompilationUnit* mUnit, uint32_t field_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::DexCache* dex_cache = mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile());
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader());
+  SirtRef<mirror::DexCache> dex_cache(soa.Self(), mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   return mUnit->GetClassLinker()->ResolveField(*mUnit->GetDexFile(), field_idx, dex_cache,
                                                class_loader, false);
 }
@@ -932,8 +932,8 @@
                                                                      uint32_t method_idx,
                                                                      InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::DexCache* dex_cache = mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile());
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader());
+  SirtRef<mirror::DexCache> dex_cache(soa.Self(), mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   return mUnit->GetClassLinker()->ResolveMethod(*mUnit->GetDexFile(), method_idx, dex_cache,
                                                 class_loader, NULL, type);
 }
@@ -947,9 +947,10 @@
   // Try to resolve field and ignore if an Incompatible Class Change Error (ie is static).
   mirror::ArtField* resolved_field = ComputeFieldReferencedFromCompilingMethod(soa, mUnit, field_idx);
   if (resolved_field != NULL && !resolved_field->IsStatic()) {
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(),
+                                        resolved_field->GetDeclaringClass()->GetDexCache());
     mirror::Class* referrer_class =
-        ComputeCompilingMethodsClass(soa, resolved_field->GetDeclaringClass()->GetDexCache(),
-                                     mUnit);
+        ComputeCompilingMethodsClass(soa, dex_cache, mUnit);
     if (referrer_class != NULL) {
       mirror::Class* fields_class = resolved_field->GetDeclaringClass();
       bool access_ok = referrer_class->CanAccess(fields_class) &&
@@ -997,9 +998,9 @@
   // Try to resolve field and ignore if an Incompatible Class Change Error (ie isn't static).
   mirror::ArtField* resolved_field = ComputeFieldReferencedFromCompilingMethod(soa, mUnit, field_idx);
   if (resolved_field != NULL && resolved_field->IsStatic()) {
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), resolved_field->GetDeclaringClass()->GetDexCache());
     mirror::Class* referrer_class =
-        ComputeCompilingMethodsClass(soa, resolved_field->GetDeclaringClass()->GetDexCache(),
-                                     mUnit);
+        ComputeCompilingMethodsClass(soa, dex_cache, mUnit);
     if (referrer_class != NULL) {
       mirror::Class* fields_class = resolved_field->GetDeclaringClass();
       if (fields_class == referrer_class) {
@@ -1085,7 +1086,7 @@
   *direct_code = 0;
   *direct_method = 0;
   bool use_dex_cache = false;
-  bool compiling_boot = Runtime::Current()->GetHeap()->GetContinuousSpaces().size() == 1;
+  const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot();
   if (compiler_backend_ == kPortable) {
     if (sharp_type != kStatic && sharp_type != kDirect) {
       return;
@@ -1198,9 +1199,9 @@
     }
     // Don't try to fast-path if we don't understand the caller's class or this appears to be an
     // Incompatible Class Change Error.
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), resolved_method->GetDeclaringClass()->GetDexCache());
     mirror::Class* referrer_class =
-        ComputeCompilingMethodsClass(soa, resolved_method->GetDeclaringClass()->GetDexCache(),
-                                     mUnit);
+        ComputeCompilingMethodsClass(soa, dex_cache, mUnit);
     bool icce = resolved_method->CheckIncompatibleClassChange(*invoke_type);
     if (referrer_class != NULL && !icce) {
       mirror::Class* methods_class = resolved_method->GetDeclaringClass();
@@ -1254,10 +1255,8 @@
           const MethodReference* devirt_map_target =
               verifier::MethodVerifier::GetDevirtMap(caller_method, dex_pc);
           if (devirt_map_target != NULL) {
-            mirror::DexCache* target_dex_cache =
-                mUnit->GetClassLinker()->FindDexCache(*devirt_map_target->dex_file);
-            mirror::ClassLoader* class_loader =
-                soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader());
+            SirtRef<mirror::DexCache> target_dex_cache(soa.Self(), mUnit->GetClassLinker()->FindDexCache(*devirt_map_target->dex_file));
+            SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
             mirror::ArtMethod* called_method =
                 mUnit->GetClassLinker()->ResolveMethod(*devirt_map_target->dex_file,
                                                        devirt_map_target->dex_method_index,
@@ -1509,13 +1508,11 @@
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
   if (!SkipClass(class_linker, jclass_loader, dex_file, class_def)) {
     ScopedObjectAccess soa(self);
-    mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(jclass_loader);
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(dex_file);
-
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), class_linker->FindDexCache(dex_file));
     // Resolve the class.
     mirror::Class* klass = class_linker->ResolveType(dex_file, class_def.class_idx_, dex_cache,
                                                      class_loader);
-
     bool resolve_fields_and_methods;
     if (klass == NULL) {
       // Class couldn't be resolved, for example, super-class is in a different dex file. Don't
@@ -1598,8 +1595,8 @@
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* class_linker = manager->GetClassLinker();
   const DexFile& dex_file = *manager->GetDexFile();
-  mirror::DexCache* dex_cache = class_linker->FindDexCache(dex_file);
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(manager->GetClassLoader());
+  SirtRef<mirror::DexCache> dex_cache(soa.Self(), class_linker->FindDexCache(dex_file));
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(manager->GetClassLoader()));
   mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader);
 
   if (klass == NULL) {
@@ -1616,7 +1613,7 @@
 }
 
 void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
-                                    ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                    ThreadPool& thread_pool, TimingLogger& timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: we could resolve strings here, although the string table is largely filled with class
@@ -1635,7 +1632,7 @@
 }
 
 void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                            ThreadPool& thread_pool, base::TimingLogger& timings) {
+                            ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -1652,8 +1649,9 @@
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
   ClassLinker* class_linker = manager->GetClassLinker();
   jobject jclass_loader = manager->GetClassLoader();
-  mirror::Class* klass = class_linker->FindClass(descriptor,
-                                                 soa.Decode<mirror::ClassLoader*>(jclass_loader));
+  SirtRef<mirror::ClassLoader> class_loader(
+      soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
+  mirror::Class* klass = class_linker->FindClass(descriptor, class_loader);
   if (klass == NULL) {
     CHECK(soa.Self()->IsExceptionPending());
     soa.Self()->ClearException();
@@ -1663,11 +1661,10 @@
      * This is to ensure the class is structurally sound for compilation. An unsound class
      * will be rejected by the verifier and later skipped during compilation in the compiler.
      */
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(dex_file);
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), class_linker->FindDexCache(dex_file));
     std::string error_msg;
-    if (verifier::MethodVerifier::VerifyClass(&dex_file, dex_cache,
-                                              soa.Decode<mirror::ClassLoader*>(jclass_loader),
-                                              &class_def, true, &error_msg) ==
+    if (verifier::MethodVerifier::VerifyClass(&dex_file, dex_cache, class_loader, &class_def, true,
+                                              &error_msg) ==
                                                   verifier::MethodVerifier::kHardFailure) {
       LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor)
                  << " because: " << error_msg;
@@ -1689,7 +1686,7 @@
 }
 
 void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
-                                   ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                   ThreadPool& thread_pool, TimingLogger& timings) {
   timings.NewSplit("Verify Dex File");
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool);
@@ -2124,7 +2121,8 @@
   const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_);
 
   ScopedObjectAccess soa(Thread::Current());
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(jclass_loader);
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                            soa.Decode<mirror::ClassLoader*>(jclass_loader));
   mirror::Class* klass = manager->GetClassLinker()->FindClass(descriptor, class_loader);
 
   if (klass != NULL && !SkipClass(jclass_loader, dex_file, klass)) {
@@ -2194,7 +2192,7 @@
 }
 
 void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
-                                       ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                       ThreadPool& thread_pool, TimingLogger& timings) {
   timings.NewSplit("InitializeNoClinit");
 #ifndef NDEBUG
   // Sanity check blacklist descriptors.
@@ -2212,7 +2210,7 @@
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                       ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -2221,7 +2219,7 @@
 }
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                       ThreadPool& thread_pool, base::TimingLogger& timings) {
+                       ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -2253,7 +2251,8 @@
   DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile;
   {
     ScopedObjectAccess soa(Thread::Current());
-    mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(jclass_loader);
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                              soa.Decode<mirror::ClassLoader*>(jclass_loader));
     dex_to_dex_compilation_level = GetDexToDexCompilationlevel(class_loader, dex_file, class_def);
   }
   ClassDataItemIterator it(dex_file, class_data);
@@ -2301,7 +2300,7 @@
 }
 
 void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
-                                    ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                    ThreadPool& thread_pool, TimingLogger& timings) {
   timings.NewSplit("Compile Dex File");
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
                                      &dex_file, thread_pool);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 9321f06..7e81849 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -98,11 +98,11 @@
   ~CompilerDriver();
 
   void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  base::TimingLogger& timings)
+                  TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Compile a single Method
-  void CompileOne(const mirror::ArtMethod* method, base::TimingLogger& timings)
+  void CompileOne(const mirror::ArtMethod* method, TimingLogger& timings)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const InstructionSet& GetInstructionSet() const {
@@ -340,42 +340,43 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  ThreadPool& thread_pool, base::TimingLogger& timings)
+                  ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  void LoadImageClasses(base::TimingLogger& timings);
+  void LoadImageClasses(TimingLogger& timings);
 
   // Attempt to resolve all type, methods, fields, and strings
   // referenced from code in the dex file following PathClassLoader
   // ordering semantics.
   void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool& thread_pool, base::TimingLogger& timings)
+               ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void ResolveDexFile(jobject class_loader, const DexFile& dex_file,
-                      ThreadPool& thread_pool, base::TimingLogger& timings)
+                      ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-              ThreadPool& thread_pool, base::TimingLogger& timings);
+              ThreadPool& thread_pool, TimingLogger& timings);
   void VerifyDexFile(jobject class_loader, const DexFile& dex_file,
-                     ThreadPool& thread_pool, base::TimingLogger& timings)
+                     ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   void InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                         ThreadPool& thread_pool, base::TimingLogger& timings)
+                         ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void InitializeClasses(jobject class_loader, const DexFile& dex_file,
-                         ThreadPool& thread_pool, base::TimingLogger& timings)
+                         ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_, compiled_classes_lock_);
 
-  void UpdateImageClasses(base::TimingLogger& timings);
+  void UpdateImageClasses(TimingLogger& timings)
+      LOCKS_EXCLUDED(Locks::mutator_lock_);
   static void FindClinitImageClassesCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool& thread_pool, base::TimingLogger& timings);
+               ThreadPool& thread_pool, TimingLogger& timings);
   void CompileDexFile(jobject class_loader, const DexFile& dex_file,
-                      ThreadPool& thread_pool, base::TimingLogger& timings)
+                      ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
                      InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx,
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index c6687bb..a5eb94f 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -36,12 +36,13 @@
 class CompilerDriverTest : public CommonTest {
  protected:
   void CompileAll(jobject class_loader) LOCKS_EXCLUDED(Locks::mutator_lock_) {
-    base::TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
+    TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
     timings.StartSplit("CompileAll");
     compiler_driver_->CompileAll(class_loader,
                                  Runtime::Current()->GetCompileTimeClassPath(class_loader),
                                  timings);
     MakeAllExecutable(class_loader);
+    timings.EndSplit();
   }
 
   void EnsureCompiled(jobject class_loader, const char* class_name, const char* method,
@@ -78,7 +79,9 @@
       const DexFile::ClassDef& class_def = dex_file.GetClassDef(i);
       const char* descriptor = dex_file.GetClassDescriptor(class_def);
       ScopedObjectAccess soa(Thread::Current());
-      mirror::Class* c = class_linker->FindClass(descriptor, soa.Decode<mirror::ClassLoader*>(class_loader));
+      Thread* self = Thread::Current();
+      SirtRef<mirror::ClassLoader> loader(self, soa.Decode<mirror::ClassLoader*>(class_loader));
+      mirror::Class* c = class_linker->FindClass(descriptor, loader);
       CHECK(c != NULL);
       for (size_t i = 0; i < c->NumDirectMethods(); i++) {
         MakeExecutable(c->GetDirectMethod(i));
@@ -142,8 +145,9 @@
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
-    CompileVirtualMethod(NULL, "java.lang.Class", "isFinalizable", "()Z");
-    CompileDirectMethod(NULL, "java.lang.Object", "<init>", "()V");
+    SirtRef<mirror::ClassLoader> null_loader(soa.Self(), nullptr);
+    CompileVirtualMethod(null_loader, "java.lang.Class", "isFinalizable", "()Z");
+    CompileDirectMethod(null_loader, "java.lang.Object", "<init>", "()V");
     class_loader = LoadDex("AbstractMethod");
   }
   ASSERT_TRUE(class_loader != NULL);
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index 8e19ef6..f3fef23 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -358,10 +358,11 @@
     mirror::ArtMethod* method = NULL;
     if (compiler_driver_->IsImage()) {
       ClassLinker* linker = Runtime::Current()->GetClassLinker();
-      mirror::DexCache* dex_cache = linker->FindDexCache(dex_file);
       // Unchecked as we hold mutator_lock_ on entry.
       ScopedObjectAccessUnchecked soa(Thread::Current());
-      method = linker->ResolveMethod(dex_file, method_idx, dex_cache, NULL, NULL, invoke_type);
+      SirtRef<mirror::DexCache> dex_cache(soa.Self(), linker->FindDexCache(dex_file));
+      SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
+      method = linker->ResolveMethod(dex_file, method_idx, dex_cache, class_loader, NULL, invoke_type);
       CHECK(method != NULL);
     }
     const CompiledMethod* compiled_method =
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index a8b7c88..c71cc97 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -46,7 +46,7 @@
     {
       jobject class_loader = NULL;
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      base::TimingLogger timings("ImageTest::WriteRead", false, false);
+      TimingLogger timings("ImageTest::WriteRead", false, false);
       timings.StartSplit("CompileAll");
 #if defined(ART_USE_PORTABLE_COMPILER)
       // TODO: we disable this for portable so the test executes in a reasonable amount of time.
@@ -60,13 +60,14 @@
 
       ScopedObjectAccess soa(Thread::Current());
       OatWriter oat_writer(class_linker->GetBootClassPath(),
-                           0, 0, "", compiler_driver_.get());
+                           0, 0, "", compiler_driver_.get(), &timings);
       bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                                 !kIsTargetBuild,
                                                 class_linker->GetBootClassPath(),
                                                 oat_writer,
                                                 tmp_elf.GetFile());
       ASSERT_TRUE(success);
+      timings.EndSplit();
     }
   }
   // Workound bug that mcld::Linker::emit closes tmp_elf by reopening as tmp_oat.
@@ -94,11 +95,11 @@
     ASSERT_NE(0U, image_header.GetImageBitmapSize());
 
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    ASSERT_EQ(1U, heap->GetContinuousSpaces().size());
-    gc::space::ContinuousSpace* space = heap->GetContinuousSpaces().front();
+    ASSERT_TRUE(!heap->GetContinuousSpaces().empty());
+    gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
     ASSERT_FALSE(space->IsImageSpace());
     ASSERT_TRUE(space != NULL);
-    ASSERT_TRUE(space->IsDlMallocSpace());
+    ASSERT_TRUE(space->IsMallocSpace());
     ASSERT_GE(sizeof(image_header) + space->Size(), static_cast<size_t>(file->GetLength()));
   }
 
@@ -139,11 +140,8 @@
   class_linker_ = runtime_->GetClassLinker();
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  ASSERT_EQ(2U, heap->GetContinuousSpaces().size());
-  ASSERT_TRUE(heap->GetContinuousSpaces()[0]->IsImageSpace());
-  ASSERT_FALSE(heap->GetContinuousSpaces()[0]->IsDlMallocSpace());
-  ASSERT_FALSE(heap->GetContinuousSpaces()[1]->IsImageSpace());
-  ASSERT_TRUE(heap->GetContinuousSpaces()[1]->IsDlMallocSpace());
+  ASSERT_TRUE(heap->HasImageSpace());
+  ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
 
   gc::space::ImageSpace* image_space = heap->GetImageSpace();
   image_space->VerifyImageAllocations();
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 75be2c9..90e2c65 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -75,8 +75,6 @@
   image_begin_ = reinterpret_cast<byte*>(image_begin);
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const std::vector<DexCache*>& all_dex_caches = class_linker->GetDexCaches();
-  dex_caches_.insert(all_dex_caches.begin(), all_dex_caches.end());
 
   UniquePtr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
   if (oat_file.get() == NULL) {
@@ -121,22 +119,16 @@
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
   heap->CollectGarbage(false);  // Remove garbage.
-  // Trim size of alloc spaces.
-  for (const auto& space : heap->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      space->AsDlMallocSpace()->Trim();
-    }
-  }
 
   if (!AllocMemory()) {
     return false;
   }
-#ifndef NDEBUG
-  {  // NOLINT(whitespace/braces)
+
+  if (kIsDebugBuild) {
     ScopedObjectAccess soa(Thread::Current());
     CheckNonImageClassesRemoved();
   }
-#endif
+
   Thread::Current()->TransitionFromSuspendedToRunnable();
   size_t oat_loaded_size = 0;
   size_t oat_data_offset = 0;
@@ -144,8 +136,6 @@
   CalculateNewObjectOffsets(oat_loaded_size, oat_data_offset);
   CopyAndFixupObjects();
   PatchOatCodeAndMethods();
-  // Record allocations into the image bitmap.
-  RecordImageAllocations();
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
 
   UniquePtr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
@@ -178,39 +168,82 @@
   return true;
 }
 
-void ImageWriter::RecordImageAllocations() {
-  uint64_t start_time = NanoTime();
-  CHECK(image_bitmap_.get() != nullptr);
-  for (const auto& it : offsets_) {
-    mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + it.second);
-    DCHECK_ALIGNED(obj, kObjectAlignment);
-    image_bitmap_->Set(obj);
+void ImageWriter::SetImageOffset(mirror::Object* object, size_t offset) {
+  DCHECK(object != nullptr);
+  DCHECK_NE(offset, 0U);
+  DCHECK(!IsImageOffsetAssigned(object));
+  mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + offset);
+  DCHECK_ALIGNED(obj, kObjectAlignment);
+  image_bitmap_->Set(obj);
+  // Before we stomp over the lock word, save the hash code for later.
+  Monitor::Deflate(Thread::Current(), object);;
+  LockWord lw(object->GetLockWord());
+  switch (lw.GetState()) {
+    case LockWord::kFatLocked: {
+      LOG(FATAL) << "Fat locked object " << obj << " found during object copy";
+      break;
+    }
+    case LockWord::kThinLocked: {
+      LOG(FATAL) << "Thin locked object " << obj << " found during object copy";
+      break;
+    }
+    case LockWord::kUnlocked:
+      // No hash, don't need to save it.
+      break;
+    case LockWord::kHashCode:
+      saved_hashes_.push_back(std::make_pair(obj, lw.GetHashCode()));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable.";
+      break;
   }
-  LOG(INFO) << "RecordImageAllocations took " << PrettyDuration(NanoTime() - start_time);
+  object->SetLockWord(LockWord::FromForwardingAddress(offset));
+  DCHECK(IsImageOffsetAssigned(object));
+}
+
+void ImageWriter::AssignImageOffset(mirror::Object* object) {
+  DCHECK(object != nullptr);
+  SetImageOffset(object, image_end_);
+  image_end_ += RoundUp(object->SizeOf(), 8);  // 64-bit alignment
+  DCHECK_LT(image_end_, image_->Size());
+}
+
+bool ImageWriter::IsImageOffsetAssigned(const mirror::Object* object) const {
+  DCHECK(object != nullptr);
+  return object->GetLockWord().GetState() == LockWord::kForwardingAddress;
+}
+
+size_t ImageWriter::GetImageOffset(const mirror::Object* object) const {
+  DCHECK(object != nullptr);
+  DCHECK(IsImageOffsetAssigned(object));
+  LockWord lock_word = object->GetLockWord();
+  size_t offset = lock_word.ForwardingAddress();
+  DCHECK_LT(offset, image_end_);
+  return offset;
 }
 
 bool ImageWriter::AllocMemory() {
-  size_t size = 0;
-  for (const auto& space : Runtime::Current()->GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      size += space->Size();
-    }
-  }
-
-  int prot = PROT_READ | PROT_WRITE;
-  size_t length = RoundUp(size, kPageSize);
+  size_t length = RoundUp(Runtime::Current()->GetHeap()->GetTotalMemory(), kPageSize);
   std::string error_msg;
-  image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, prot, &error_msg));
+  image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, PROT_READ | PROT_WRITE,
+                                    &error_msg));
   if (UNLIKELY(image_.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
     return false;
   }
+
+  // Create the image bitmap.
+  image_bitmap_.reset(gc::accounting::SpaceBitmap::Create("image bitmap", image_->Begin(),
+                                                          length));
+  if (image_bitmap_.get() == nullptr) {
+    LOG(ERROR) << "Failed to allocate memory for image bitmap";
+    return false;
+  }
   return true;
 }
 
 void ImageWriter::ComputeLazyFieldsForImageClasses() {
-  Runtime* runtime = Runtime::Current();
-  ClassLinker* class_linker = runtime->GetClassLinker();
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   class_linker->VisitClassesWithoutClassesLock(ComputeLazyFieldsForClassesVisitor, NULL);
 }
 
@@ -223,13 +256,17 @@
   if (!obj->GetClass()->IsStringClass()) {
     return;
   }
-  String* string = obj->AsString();
+  mirror::String* string = obj->AsString();
   const uint16_t* utf16_string = string->GetCharArray()->GetData() + string->GetOffset();
-  ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg);
-  for (DexCache* dex_cache : writer->dex_caches_) {
+  for (DexCache* dex_cache : Runtime::Current()->GetClassLinker()->GetDexCaches()) {
     const DexFile& dex_file = *dex_cache->GetDexFile();
-    const DexFile::StringId* string_id = dex_file.FindStringId(utf16_string);
-    if (string_id != NULL) {
+    const DexFile::StringId* string_id;
+    if (UNLIKELY(string->GetLength() == 0)) {
+      string_id = dex_file.FindStringId("");
+    } else {
+      string_id = dex_file.FindStringId(utf16_string);
+    }
+    if (string_id != nullptr) {
       // This string occurs in this dex file, assign the dex cache entry.
       uint32_t string_idx = dex_file.GetIndexForStringId(*string_id);
       if (dex_cache->GetResolvedString(string_idx) == NULL) {
@@ -239,13 +276,9 @@
   }
 }
 
-void ImageWriter::ComputeEagerResolvedStrings()
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  // TODO: Check image spaces only?
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-  heap->FlushAllocStack();
-  heap->GetLiveBitmap()->Walk(ComputeEagerResolvedStringsCallback, this);
+void ImageWriter::ComputeEagerResolvedStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
 }
 
 bool ImageWriter::IsImageClass(const Class* klass) {
@@ -278,7 +311,7 @@
 
   // Clear references to removed classes from the DexCaches.
   ArtMethod* resolution_method = runtime->GetResolutionMethod();
-  for (DexCache* dex_cache : dex_caches_) {
+  for (DexCache* dex_cache : class_linker->GetDexCaches()) {
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
       if (klass != NULL && !IsImageClass(klass)) {
@@ -311,31 +344,22 @@
 
 void ImageWriter::CheckNonImageClassesRemoved()
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (compiler_driver_.GetImageClasses() == NULL) {
-    return;
+  if (compiler_driver_.GetImageClasses() != nullptr) {
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    heap->VisitObjects(CheckNonImageClassesRemovedCallback, this);
   }
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  Thread* self = Thread::Current();
-  {
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    heap->FlushAllocStack();
-  }
-
-  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  heap->GetLiveBitmap()->Walk(CheckNonImageClassesRemovedCallback, this);
 }
 
 void ImageWriter::CheckNonImageClassesRemovedCallback(Object* obj, void* arg) {
   ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
-  if (!obj->IsClass()) {
-    return;
-  }
-  Class* klass = obj->AsClass();
-  if (!image_writer->IsImageClass(klass)) {
-    image_writer->DumpImageClasses();
-    CHECK(image_writer->IsImageClass(klass)) << ClassHelper(klass).GetDescriptor()
-                                             << " " << PrettyDescriptor(klass);
+  if (obj->IsClass()) {
+    Class* klass = obj->AsClass();
+    if (!image_writer->IsImageClass(klass)) {
+      image_writer->DumpImageClasses();
+      CHECK(image_writer->IsImageClass(klass)) << ClassHelper(klass).GetDescriptor()
+                                               << " " << PrettyDescriptor(klass);
+    }
   }
 }
 
@@ -347,53 +371,50 @@
   }
 }
 
-void ImageWriter::CalculateNewObjectOffsetsCallback(Object* obj, void* arg) {
+void ImageWriter::CalculateObjectOffsets(Object* obj) {
   DCHECK(obj != NULL);
-  DCHECK(arg != NULL);
-  ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
-
   // if it is a string, we want to intern it if its not interned.
   if (obj->GetClass()->IsStringClass()) {
     // we must be an interned string that was forward referenced and already assigned
-    if (image_writer->IsImageOffsetAssigned(obj)) {
+    if (IsImageOffsetAssigned(obj)) {
       DCHECK_EQ(obj, obj->AsString()->Intern());
       return;
     }
-    SirtRef<String> interned(Thread::Current(), obj->AsString()->Intern());
-    if (obj != interned.get()) {
-      if (!image_writer->IsImageOffsetAssigned(interned.get())) {
+    Thread* self = Thread::Current();
+    SirtRef<Object> sirt_obj(self, obj);
+    mirror::String* interned = obj->AsString()->Intern();
+    if (sirt_obj.get() != interned) {
+      if (!IsImageOffsetAssigned(interned)) {
         // interned obj is after us, allocate its location early
-        image_writer->AssignImageOffset(interned.get());
+        AssignImageOffset(interned);
       }
       // point those looking for this object to the interned version.
-      image_writer->SetImageOffset(obj, image_writer->GetImageOffset(interned.get()));
+      SetImageOffset(sirt_obj.get(), GetImageOffset(interned));
       return;
     }
     // else (obj == interned), nothing to do but fall through to the normal case
   }
 
-  image_writer->AssignImageOffset(obj);
+  AssignImageOffset(obj);
 }
 
 ObjectArray<Object>* ImageWriter::CreateImageRoots() const {
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
-  Class* object_array_class = class_linker->FindSystemClass("[Ljava/lang/Object;");
   Thread* self = Thread::Current();
+  SirtRef<Class> object_array_class(self, class_linker->FindSystemClass("[Ljava/lang/Object;"));
 
   // build an Object[] of all the DexCaches used in the source_space_
-  ObjectArray<Object>* dex_caches = ObjectArray<Object>::Alloc(self, object_array_class,
-                                                               dex_caches_.size());
+  ObjectArray<Object>* dex_caches = ObjectArray<Object>::Alloc(self, object_array_class.get(),
+                                                               class_linker->GetDexCaches().size());
   int i = 0;
-  for (DexCache* dex_cache : dex_caches_) {
+  for (DexCache* dex_cache : class_linker->GetDexCaches()) {
     dex_caches->Set(i++, dex_cache);
   }
 
   // build an Object[] of the roots needed to restore the runtime
-  SirtRef<ObjectArray<Object> >
-      image_roots(self,
-                  ObjectArray<Object>::Alloc(self, object_array_class,
-                                             ImageHeader::kImageRootsMax));
+  SirtRef<ObjectArray<Object> > image_roots(
+      self, ObjectArray<Object>::Alloc(self, object_array_class.get(), ImageHeader::kImageRootsMax));
   image_roots->Set(ImageHeader::kResolutionMethod, runtime->GetResolutionMethod());
   image_roots->Set(ImageHeader::kImtConflictMethod, runtime->GetImtConflictMethod());
   image_roots->Set(ImageHeader::kDefaultImt, runtime->GetDefaultImt());
@@ -405,24 +426,82 @@
                    runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
   image_roots->Set(ImageHeader::kOatLocation,
                    String::AllocFromModifiedUtf8(self, oat_file_->GetLocation().c_str()));
-  image_roots->Set(ImageHeader::kDexCaches,
-                   dex_caches);
-  image_roots->Set(ImageHeader::kClassRoots,
-                   class_linker->GetClassRoots());
+  image_roots->Set(ImageHeader::kDexCaches, dex_caches);
+  image_roots->Set(ImageHeader::kClassRoots, class_linker->GetClassRoots());
   for (int i = 0; i < ImageHeader::kImageRootsMax; i++) {
     CHECK(image_roots->Get(i) != NULL);
   }
   return image_roots.get();
 }
 
+// Walk instance fields of the given Class. Separate function to allow recursion on the super
+// class.
+void ImageWriter::WalkInstanceFields(mirror::Object* obj, mirror::Class* klass) {
+  // Visit fields of parent classes first.
+  SirtRef<mirror::Class> sirt_class(Thread::Current(), klass);
+  mirror::Class* super = sirt_class->GetSuperClass();
+  if (super != nullptr) {
+    WalkInstanceFields(obj, super);
+  }
+  //
+  size_t num_reference_fields = sirt_class->NumReferenceInstanceFields();
+  for (size_t i = 0; i < num_reference_fields; ++i) {
+    mirror::ArtField* field = sirt_class->GetInstanceField(i);
+    MemberOffset field_offset = field->GetOffset();
+    mirror::Object* value = obj->GetFieldObject<mirror::Object*>(field_offset, false);
+    if (value != nullptr) {
+      WalkFieldsInOrder(value);
+    }
+  }
+}
+
+// For an unvisited object, visit it then all its children found via fields.
+void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
+  if (!IsImageOffsetAssigned(obj)) {
+    // Walk instance fields of all objects
+    Thread* self = Thread::Current();
+    SirtRef<mirror::Object> sirt_obj(self, obj);
+    SirtRef<mirror::Class> klass(self, obj->GetClass());
+    // visit the object itself.
+    CalculateObjectOffsets(sirt_obj.get());
+    WalkInstanceFields(sirt_obj.get(), klass.get());
+    // Walk static fields of a Class.
+    if (sirt_obj->IsClass()) {
+      size_t num_static_fields = klass->NumReferenceStaticFields();
+      for (size_t i = 0; i < num_static_fields; ++i) {
+        mirror::ArtField* field = klass->GetStaticField(i);
+        MemberOffset field_offset = field->GetOffset();
+        mirror::Object* value = sirt_obj->GetFieldObject<mirror::Object*>(field_offset, false);
+        if (value != nullptr) {
+          WalkFieldsInOrder(value);
+        }
+      }
+    } else if (sirt_obj->IsObjectArray()) {
+      // Walk elements of an object array.
+      int32_t length = sirt_obj->AsObjectArray<mirror::Object>()->GetLength();
+      for (int32_t i = 0; i < length; i++) {
+        mirror::ObjectArray<mirror::Object>* obj_array = sirt_obj->AsObjectArray<mirror::Object>();
+        mirror::Object* value = obj_array->Get(i);
+        if (value != nullptr) {
+          WalkFieldsInOrder(value);
+        }
+      }
+    }
+  }
+}
+
+void ImageWriter::WalkFieldsCallback(mirror::Object* obj, void* arg) {
+  ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg);
+  DCHECK(writer != nullptr);
+  writer->WalkFieldsInOrder(obj);
+}
+
 void ImageWriter::CalculateNewObjectOffsets(size_t oat_loaded_size, size_t oat_data_offset) {
   CHECK_NE(0U, oat_loaded_size);
   Thread* self = Thread::Current();
   SirtRef<ObjectArray<Object> > image_roots(self, CreateImageRoots());
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  const auto& spaces = heap->GetContinuousSpaces();
-  DCHECK(!spaces.empty());
   DCHECK_EQ(0U, image_end_);
 
   // Leave space for the header, but do not write it yet, we need to
@@ -431,21 +510,14 @@
 
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    heap->FlushAllocStack();
     // TODO: Image spaces only?
-    // TODO: Add InOrderWalk to heap bitmap.
     const char* old = self->StartAssertNoThreadSuspension("ImageWriter");
-    DCHECK(heap->GetLargeObjectsSpace()->GetLiveObjects()->IsEmpty());
-    for (const auto& space : spaces) {
-      space->GetLiveBitmap()->InOrderWalk(CalculateNewObjectOffsetsCallback, this);
-      DCHECK_LT(image_end_, image_->Size());
-    }
+    DCHECK_LT(image_end_, image_->Size());
+    // Clear any pre-existing monitors which may have been in the monitor words.
+    heap->VisitObjects(WalkFieldsCallback, this);
     self->EndAssertNoThreadSuspension(old);
   }
 
-  // Create the image bitmap.
-  image_bitmap_.reset(gc::accounting::SpaceBitmap::Create("image bitmap", image_->Begin(),
-                                                          image_end_));
   const byte* oat_file_begin = image_begin_ + RoundUp(image_end_, kPageSize);
   const byte* oat_file_end = oat_file_begin + oat_loaded_size;
   oat_data_begin_ = oat_file_begin + oat_data_offset;
@@ -453,10 +525,13 @@
 
   // Return to write header at start of image with future location of image_roots. At this point,
   // image_end_ is the size of the image (excluding bitmaps).
+  const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * gc::accounting::SpaceBitmap::kAlignment;
+  const size_t bitmap_bytes = RoundUp(image_end_, heap_bytes_per_bitmap_byte) /
+      heap_bytes_per_bitmap_byte;
   ImageHeader image_header(reinterpret_cast<uint32_t>(image_begin_),
                            static_cast<uint32_t>(image_end_),
                            RoundUp(image_end_, kPageSize),
-                           image_bitmap_->Size(),
+                           RoundUp(bitmap_bytes, kPageSize),
                            reinterpret_cast<uint32_t>(GetImageAddress(image_roots.get())),
                            oat_file_->GetOatHeader().GetChecksum(),
                            reinterpret_cast<uint32_t>(oat_file_begin),
@@ -477,17 +552,19 @@
   heap->DisableObjectValidation();
   // TODO: Image spaces only?
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  heap->FlushAllocStack();
-  heap->GetLiveBitmap()->Walk(CopyAndFixupObjectsCallback, this);
+  heap->VisitObjects(CopyAndFixupObjectsCallback, this);
+  // Fix up the object previously had hash codes.
+  for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) {
+    hash_pair.first->SetLockWord(LockWord::FromHashCode(hash_pair.second));
+  }
+  saved_hashes_.clear();
   self->EndAssertNoThreadSuspension(old_cause);
 }
 
-void ImageWriter::CopyAndFixupObjectsCallback(Object* object, void* arg) {
-  DCHECK(object != NULL);
+void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) {
+  DCHECK(obj != NULL);
   DCHECK(arg != NULL);
-  const Object* obj = object;
   ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
-
   // see GetLocalAddress for similar computation
   size_t offset = image_writer->GetImageOffset(obj);
   byte* dst = image_writer->image_->Begin() + offset;
@@ -498,33 +575,7 @@
   Object* copy = reinterpret_cast<Object*>(dst);
   // Write in a hash code of objects which have inflated monitors or a hash code in their monitor
   // word.
-  LockWord lw(copy->GetLockWord());
-  switch (lw.GetState()) {
-    case LockWord::kFatLocked: {
-      Monitor* monitor = lw.FatLockMonitor();
-      CHECK(monitor != nullptr);
-      CHECK(!monitor->IsLocked());
-      if (monitor->HasHashCode()) {
-        copy->SetLockWord(LockWord::FromHashCode(monitor->GetHashCode()));
-      } else {
-        copy->SetLockWord(LockWord());
-      }
-      break;
-    }
-    case LockWord::kThinLocked: {
-      LOG(FATAL) << "Thin locked object " << obj << " found during object copy";
-      break;
-    }
-    case LockWord::kUnlocked:
-      break;
-    case LockWord::kHashCode:
-      // Do nothing since we can just keep the same hash code.
-      CHECK_NE(lw.GetHashCode(), 0);
-      break;
-    default:
-      LOG(FATAL) << "Unreachable.";
-      break;
-  }
+  copy->SetLockWord(LockWord());
   image_writer->FixupObject(obj, copy);
 }
 
@@ -629,19 +680,13 @@
   DCHECK(copy != NULL);
   Class* klass = orig->GetClass();
   DCHECK(klass != NULL);
-  FixupFields(orig,
-              copy,
-              klass->GetReferenceInstanceOffsets(),
-              false);
+  FixupFields(orig, copy, klass->GetReferenceInstanceOffsets(), false);
 }
 
 void ImageWriter::FixupStaticFields(const Class* orig, Class* copy) {
   DCHECK(orig != NULL);
   DCHECK(copy != NULL);
-  FixupFields(orig,
-              copy,
-              orig->GetReferenceStaticOffsets(),
-              true);
+  FixupFields(orig, copy, orig->GetReferenceStaticOffsets(), true);
 }
 
 void ImageWriter::FixupFields(const Object* orig,
@@ -693,11 +738,13 @@
 static ArtMethod* GetTargetMethod(const CompilerDriver::PatchInformation* patch)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  DexCache* dex_cache = class_linker->FindDexCache(patch->GetDexFile());
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, class_linker->FindDexCache(patch->GetDexFile()));
+  SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
   ArtMethod* method = class_linker->ResolveMethod(patch->GetDexFile(),
                                                   patch->GetTargetMethodIdx(),
                                                   dex_cache,
-                                                  NULL,
+                                                  class_loader,
                                                   NULL,
                                                   patch->GetTargetInvokeType());
   CHECK(method != NULL)
@@ -749,15 +796,15 @@
   // TODO: make this Thumb2 specific
   uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uint32_t>(oat_code) & ~0x1);
   uint32_t* patch_location = reinterpret_cast<uint32_t*>(base + patch->GetLiteralOffset());
-#ifndef NDEBUG
-  const DexFile::MethodId& id = patch->GetDexFile().GetMethodId(patch->GetTargetMethodIdx());
-  uint32_t expected = reinterpret_cast<uint32_t>(&id);
-  uint32_t actual = *patch_location;
-  CHECK(actual == expected || actual == value) << std::hex
-    << "actual=" << actual
-    << "expected=" << expected
-    << "value=" << value;
-#endif
+  if (kIsDebugBuild) {
+    const DexFile::MethodId& id = patch->GetDexFile().GetMethodId(patch->GetTargetMethodIdx());
+    uint32_t expected = reinterpret_cast<uint32_t>(&id);
+    uint32_t actual = *patch_location;
+    CHECK(actual == expected || actual == value) << std::hex
+      << "actual=" << actual
+      << "expected=" << expected
+      << "value=" << value;
+  }
   *patch_location = value;
   oat_header.UpdateChecksum(patch_location, sizeof(value));
 }
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 0b408e8..695f59b 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -63,31 +63,11 @@
   void RecordImageAllocations() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // We use the lock word to store the offset of the object in the image.
-  void AssignImageOffset(mirror::Object* object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(object != NULL);
-    SetImageOffset(object, image_end_);
-    image_end_ += RoundUp(object->SizeOf(), 8);  // 64-bit alignment
-    DCHECK_LT(image_end_, image_->Size());
-  }
-
-  void SetImageOffset(mirror::Object* object, size_t offset) {
-    DCHECK(object != NULL);
-    DCHECK_NE(offset, 0U);
-    DCHECK(!IsImageOffsetAssigned(object));
-    offsets_.Put(object, offset);
-  }
-
-  size_t IsImageOffsetAssigned(const mirror::Object* object) const {
-    DCHECK(object != NULL);
-    return offsets_.find(object) != offsets_.end();
-  }
-
-  size_t GetImageOffset(const mirror::Object* object) const {
-    DCHECK(object != NULL);
-    DCHECK(IsImageOffsetAssigned(object));
-    return offsets_.find(object)->second;
-  }
+  void AssignImageOffset(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetImageOffset(mirror::Object* object, size_t offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsImageOffsetAssigned(const mirror::Object* object) const;
+  size_t GetImageOffset(const mirror::Object* object) const;
 
   mirror::Object* GetImageAddress(const mirror::Object* object) const {
     if (object == NULL) {
@@ -147,7 +127,14 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* CreateImageRoots() const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void CalculateNewObjectOffsetsCallback(mirror::Object* obj, void* arg)
+  void CalculateObjectOffsets(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void WalkInstanceFields(mirror::Object* obj, mirror::Class* klass)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void WalkFieldsInOrder(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void WalkFieldsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Creates the contiguous image in memory and adjusts pointers.
@@ -180,9 +167,6 @@
 
   const CompilerDriver& compiler_driver_;
 
-  // Map of Object to where it will be at runtime.
-  SafeMap<const mirror::Object*, size_t> offsets_;
-
   // oat file with code for this image
   OatFile* oat_file_;
 
@@ -195,6 +179,9 @@
   // Beginning target image address for the output image.
   byte* image_begin_;
 
+  // Saved hashes (objects are inside of the image so that they don't move).
+  std::vector<std::pair<mirror::Object*, uint32_t> > saved_hashes_;
+
   // Beginning target oat address for the pointers from the output image to its oat file.
   const byte* oat_data_begin_;
 
@@ -211,9 +198,6 @@
   uint32_t quick_imt_conflict_trampoline_offset_;
   uint32_t quick_resolution_trampoline_offset_;
   uint32_t quick_to_interpreter_bridge_offset_;
-
-  // DexCaches seen while scanning for fixing up CodeAndDirectMethods
-  std::set<mirror::DexCache*> dex_caches_;
 };
 
 }  // namespace art
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 667b913..21dd11e 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -48,9 +48,9 @@
   void CompileForTest(jobject class_loader, bool direct,
                       const char* method_name, const char* method_sig) {
     ScopedObjectAccess soa(Thread::Current());
+    SirtRef<mirror::ClassLoader> loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(class_loader));
     // Compile the native method before starting the runtime
-    mirror::Class* c = class_linker_->FindClass("LMyClassNatives;",
-                                                soa.Decode<mirror::ClassLoader*>(class_loader));
+    mirror::Class* c = class_linker_->FindClass("LMyClassNatives;", loader);
     mirror::ArtMethod* method;
     if (direct) {
       method = c->FindDirectMethod(method_name, method_sig);
diff --git a/compiler/leb128_encoder_test.cc b/compiler/leb128_encoder_test.cc
new file mode 100644
index 0000000..4fa8075
--- /dev/null
+++ b/compiler/leb128_encoder_test.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/histogram-inl.h"
+#include "common_test.h"
+#include "leb128.h"
+#include "leb128_encoder.h"
+
+namespace art {
+
+class Leb128Test : public CommonTest {};
+
+struct DecodeUnsignedLeb128TestCase {
+  uint32_t decoded;
+  uint8_t leb128_data[5];
+};
+
+static DecodeUnsignedLeb128TestCase uleb128_tests[] = {
+    {0,          {0, 0, 0, 0, 0}},
+    {1,          {1, 0, 0, 0, 0}},
+    {0x7F,       {0x7F, 0, 0, 0, 0}},
+    {0x80,       {0x80, 1, 0, 0, 0}},
+    {0x81,       {0x81, 1, 0, 0, 0}},
+    {0xFF,       {0xFF, 1, 0, 0, 0}},
+    {0x4000,     {0x80, 0x80, 1, 0, 0}},
+    {0x4001,     {0x81, 0x80, 1, 0, 0}},
+    {0x4081,     {0x81, 0x81, 1, 0, 0}},
+    {0x0FFFFFFF, {0xFF, 0xFF, 0xFF, 0x7F, 0}},
+    {0xFFFFFFFF, {0xFF, 0xFF, 0xFF, 0xFF, 0xF}},
+};
+
+TEST_F(Leb128Test, Singles) {
+  // Test individual encodings.
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    UnsignedLeb128EncodingVector builder;
+    builder.PushBack(uleb128_tests[i].decoded);
+    const uint8_t* data_ptr = &uleb128_tests[i].leb128_data[0];
+    const uint8_t* encoded_data_ptr = &builder.GetData()[0];
+    for (size_t j = 0; j < 5; ++j) {
+      if (j < builder.GetData().size()) {
+        EXPECT_EQ(data_ptr[j], encoded_data_ptr[j]) << " i = " << i << " j = " << j;
+      } else {
+        EXPECT_EQ(data_ptr[j], 0U) << " i = " << i << " j = " << j;
+      }
+    }
+    EXPECT_EQ(DecodeUnsignedLeb128(&data_ptr), uleb128_tests[i].decoded) << " i = " << i;
+  }
+}
+
+TEST_F(Leb128Test, Stream) {
+  // Encode a number of entries.
+  UnsignedLeb128EncodingVector builder;
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    builder.PushBack(uleb128_tests[i].decoded);
+  }
+  const uint8_t* encoded_data_ptr = &builder.GetData()[0];
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    const uint8_t* data_ptr = &uleb128_tests[i].leb128_data[0];
+    for (size_t j = 0; j < 5; ++j) {
+      if (data_ptr[j] != 0) {
+        EXPECT_EQ(data_ptr[j], encoded_data_ptr[j]) << " i = " << i << " j = " << j;
+      }
+    }
+    EXPECT_EQ(DecodeUnsignedLeb128(&encoded_data_ptr), uleb128_tests[i].decoded) << " i = " << i;
+  }
+}
+
+TEST_F(Leb128Test, Speed) {
+  UniquePtr<Histogram<uint64_t> > enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
+  UniquePtr<Histogram<uint64_t> > dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
+  UnsignedLeb128EncodingVector builder;
+  // Push back 1024 chunks of 1024 values measuring encoding speed.
+  uint64_t last_time = NanoTime();
+  for (size_t i = 0; i < 1024; i++) {
+    for (size_t j = 0; j < 1024; j++) {
+      builder.PushBack((i * 1024) + j);
+    }
+    uint64_t cur_time = NanoTime();
+    enc_hist->AddValue(cur_time - last_time);
+    last_time = cur_time;
+  }
+  // Verify encoding and measure decode speed.
+  const uint8_t* encoded_data_ptr = &builder.GetData()[0];
+  last_time = NanoTime();
+  for (size_t i = 0; i < 1024; i++) {
+    for (size_t j = 0; j < 1024; j++) {
+      EXPECT_EQ(DecodeUnsignedLeb128(&encoded_data_ptr), (i * 1024) + j);
+    }
+    uint64_t cur_time = NanoTime();
+    dec_hist->AddValue(cur_time - last_time);
+    last_time = cur_time;
+  }
+
+  Histogram<uint64_t>::CumulativeData enc_data;
+  enc_hist->CreateHistogram(&enc_data);
+  enc_hist->PrintConfidenceIntervals(std::cout, 0.99, enc_data);
+
+  Histogram<uint64_t>::CumulativeData dec_data;
+  dec_hist->CreateHistogram(&dec_data);
+  dec_hist->PrintConfidenceIntervals(std::cout, 0.99, dec_data);
+}
+
+}  // namespace art
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index 038f5dc..feb495e 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -211,6 +211,7 @@
   ::llvm::TargetOptions target_options;
   target_options.FloatABIType = ::llvm::FloatABI::Soft;
   target_options.NoFramePointerElim = true;
+  target_options.NoFramePointerElimNonLeaf = true;
   target_options.UseSoftFloat = false;
   target_options.EnableFastISel = false;
 
@@ -254,7 +255,7 @@
 
     ::llvm::OwningPtr< ::llvm::tool_output_file> out_file(
       new ::llvm::tool_output_file(bitcode_filename_.c_str(), errmsg,
-                                 ::llvm::sys::fs::F_Binary));
+                                 ::llvm::raw_fd_ostream::F_Binary));
 
 
     if (!errmsg.empty()) {
@@ -274,6 +275,7 @@
   // pm_builder.Inliner = ::llvm::createAlwaysInlinerPass();
   // pm_builder.Inliner = ::llvm::createPartialInliningPass();
   pm_builder.OptLevel = 3;
+  pm_builder.DisableSimplifyLibCalls = 1;
   pm_builder.DisableUnitAtATime = 1;
   pm_builder.populateFunctionPassManager(fpm);
   pm_builder.populateModulePassManager(pm);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 6213b45..fd0a69d 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -67,6 +67,7 @@
 };
 
 TEST_F(OatTest, WriteRead) {
+  TimingLogger timings("CommonTest::WriteRead", false, false);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: make selectable
@@ -82,7 +83,7 @@
                                             insn_features, false, NULL, 2, true));
   jobject class_loader = NULL;
   if (kCompile) {
-    base::TimingLogger timings("OatTest::WriteRead", false, false);
+    TimingLogger timings("OatTest::WriteRead", false, false);
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), timings);
   }
 
@@ -92,7 +93,8 @@
                        42U,
                        4096U,
                        "lue.art",
-                       compiler_driver_.get());
+                       compiler_driver_.get(),
+                       &timings);
   bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                             !kIsTargetBuild,
                                             class_linker->GetBootClassPath(),
@@ -101,7 +103,6 @@
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
-    base::TimingLogger timings("CommonTest::WriteRead", false, false);
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), timings);
   }
   std::string error_msg;
@@ -130,7 +131,8 @@
       num_virtual_methods = it.NumVirtualMethods();
     }
     const char* descriptor = dex_file->GetClassDescriptor(class_def);
-    mirror::Class* klass = class_linker->FindClass(descriptor, NULL);
+    SirtRef<mirror::ClassLoader> loader(Thread::Current(), nullptr);
+    mirror::Class* klass = class_linker->FindClass(descriptor, loader);
 
     UniquePtr<const OatFile::OatClass> oat_class(oat_dex_file->GetOatClass(i));
     CHECK_EQ(mirror::Class::Status::kStatusNotReady, oat_class->GetStatus()) << descriptor;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index f3bb112..8382469 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -40,7 +40,8 @@
                      uint32_t image_file_location_oat_checksum,
                      uint32_t image_file_location_oat_begin,
                      const std::string& image_file_location,
-                     const CompilerDriver* compiler)
+                     const CompilerDriver* compiler,
+                     TimingLogger* timings)
   : compiler_driver_(compiler),
     dex_files_(&dex_files),
     image_file_location_oat_checksum_(image_file_location_oat_checksum),
@@ -77,12 +78,31 @@
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0) {
-  size_t offset = InitOatHeader();
-  offset = InitOatDexFiles(offset);
-  offset = InitDexFiles(offset);
-  offset = InitOatClasses(offset);
-  offset = InitOatCode(offset);
-  offset = InitOatCodeDexFiles(offset);
+  size_t offset;
+  {
+    TimingLogger::ScopedSplit split("InitOatHeader", timings);
+    offset = InitOatHeader();
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatDexFiles", timings);
+    offset = InitOatDexFiles(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitDexFiles", timings);
+    offset = InitDexFiles(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatClasses", timings);
+    offset = InitOatClasses(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatCode", timings);
+    offset = InitOatCode(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatCodeDexFiles", timings);
+    offset = InitOatCodeDexFiles(offset);
+  }
   size_ = offset;
 
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
@@ -405,23 +425,23 @@
     size_t gc_map_size = gc_map.size() * sizeof(gc_map[0]);
     gc_map_offset = (gc_map_size == 0) ? 0 : offset;
 
-#if !defined(NDEBUG)
-    // We expect GC maps except when the class hasn't been verified or the method is native
-    ClassReference class_ref(&dex_file, class_def_index);
-    CompiledClass* compiled_class = compiler_driver_->GetCompiledClass(class_ref);
-    mirror::Class::Status status;
-    if (compiled_class != NULL) {
-      status = compiled_class->GetStatus();
-    } else if (verifier::MethodVerifier::IsClassRejected(class_ref)) {
-      status = mirror::Class::kStatusError;
-    } else {
-      status = mirror::Class::kStatusNotReady;
+    if (kIsDebugBuild) {
+      // We expect GC maps except when the class hasn't been verified or the method is native
+      ClassReference class_ref(&dex_file, class_def_index);
+      CompiledClass* compiled_class = compiler_driver_->GetCompiledClass(class_ref);
+      mirror::Class::Status status;
+      if (compiled_class != NULL) {
+        status = compiled_class->GetStatus();
+      } else if (verifier::MethodVerifier::IsClassRejected(class_ref)) {
+        status = mirror::Class::kStatusError;
+      } else {
+        status = mirror::Class::kStatusNotReady;
+      }
+      CHECK(gc_map_size != 0 || is_native || status < mirror::Class::kStatusVerified)
+          << &gc_map << " " << gc_map_size << " " << (is_native ? "true" : "false") << " "
+          << (status < mirror::Class::kStatusVerified) << " " << status << " "
+          << PrettyMethod(method_idx, dex_file);
     }
-    CHECK(gc_map_size != 0 || is_native || status < mirror::Class::kStatusVerified)
-        << &gc_map << " " << gc_map_size << " " << (is_native ? "true" : "false") << " "
-        << (status < mirror::Class::kStatusVerified) << " " << status << " "
-        << PrettyMethod(method_idx, dex_file);
-#endif
 
     // Deduplicate GC maps
     SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator gc_map_iter =
@@ -448,11 +468,12 @@
 
   if (compiler_driver_->IsImage()) {
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
-    mirror::DexCache* dex_cache = linker->FindDexCache(dex_file);
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), linker->FindDexCache(dex_file));
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
     mirror::ArtMethod* method = linker->ResolveMethod(dex_file, method_idx, dex_cache,
-                                                      NULL, NULL, invoke_type);
+                                                      class_loader, nullptr, invoke_type);
     CHECK(method != NULL);
     method->SetFrameSizeInBytes(frame_size_in_bytes);
     method->SetCoreSpillMask(core_spill_mask);
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5d947cf..64275e6 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -67,7 +67,8 @@
             uint32_t image_file_location_oat_checksum,
             uint32_t image_file_location_oat_begin,
             const std::string& image_file_location,
-            const CompilerDriver* compiler);
+            const CompilerDriver* compiler,
+            TimingLogger* timings);
 
   const OatHeader& GetOatHeader() const {
     return *oat_header_;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 3781921..8b232700 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -242,7 +242,7 @@
                                       bool image,
                                       UniquePtr<CompilerDriver::DescriptorSet>& image_classes,
                                       bool dump_stats,
-                                      base::TimingLogger& timings) {
+                                      TimingLogger& timings) {
     // SirtRef and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = NULL;
     Thread* self = Thread::Current();
@@ -280,6 +280,7 @@
     uint32_t image_file_location_oat_checksum = 0;
     uint32_t image_file_location_oat_data_begin = 0;
     if (!driver->IsImage()) {
+      TimingLogger::ScopedSplit split("Loading image checksum", &timings);
       gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
       image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
       image_file_location_oat_data_begin =
@@ -294,8 +295,10 @@
                          image_file_location_oat_checksum,
                          image_file_location_oat_data_begin,
                          image_file_location,
-                         driver.get());
+                         driver.get(),
+                         &timings);
 
+    TimingLogger::ScopedSplit split("Writing ELF", &timings);
     if (!driver->WriteElf(android_root, is_host, dex_files, oat_writer, oat_file)) {
       LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
       return NULL;
@@ -600,7 +603,7 @@
 }
 
 static int dex2oat(int argc, char** argv) {
-  base::TimingLogger timings("compiler", false, false);
+  TimingLogger timings("compiler", false, false);
 
   InitLogging(argv);
 
@@ -1091,7 +1094,7 @@
 
   if (is_host) {
     if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
-      LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+      LOG(INFO) << Dumpable<TimingLogger>(timings);
     }
     return EXIT_SUCCESS;
   }
@@ -1133,7 +1136,7 @@
   timings.EndSplit();
 
   if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
-    LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+    LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
 
   // Everything was successfully written, do an explicit exit here to avoid running Runtime
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 8d4f3ce..936fb07 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -180,6 +180,44 @@
   return os;
 }
 
+struct FpRegister {
+  explicit FpRegister(uint32_t instr, uint16_t at_bit, uint16_t extra_at_bit) {
+    size = (instr >> 8) & 1;
+    uint32_t Vn = (instr >> at_bit) & 0xF;
+    uint32_t N = (instr >> extra_at_bit) & 1;
+    r = (size != 0 ? ((N << 4) | Vn) : ((Vn << 1) | N));
+  }
+  FpRegister(const FpRegister& other, uint32_t offset)
+      : size(other.size), r(other.r + offset) {}
+
+  uint32_t size;  // 0 = f32, 1 = f64
+  uint32_t r;
+};
+std::ostream& operator<<(std::ostream& os, const FpRegister& rhs) {
+  return os << ((rhs.size != 0) ? "d" : "s") << rhs.r;
+}
+
+struct FpRegisterRange {
+  explicit FpRegisterRange(uint32_t instr)
+      : first(instr, 12, 22), imm8(instr & 0xFF) {}
+  FpRegister first;
+  uint32_t imm8;
+};
+std::ostream& operator<<(std::ostream& os, const FpRegisterRange& rhs) {
+  os << "{" << rhs.first;
+  int count = (rhs.first.size != 0 ? ((rhs.imm8 + 1u) >> 1) : rhs.imm8);
+  if (count > 1) {
+    os << "-" << FpRegister(rhs.first, count - 1);
+  }
+  if (rhs.imm8 == 0) {
+    os << " (EMPTY)";
+  } else if (rhs.first.size != 0 && (rhs.imm8 & 1) != 0) {
+    os << rhs.first << " (HALF)";
+  }
+  os << "}";
+  return os;
+}
+
 void DisassemblerArm::DumpArm(std::ostream& os, const uint8_t* instr_ptr) {
   uint32_t instruction = ReadU32(instr_ptr);
   uint32_t cond = (instruction >> 28) & 0xf;
@@ -389,9 +427,9 @@
           args << Rt << "," << Rd << ", [" << Rn;
           const char *sign = U ? "+" : "-";
           if (P == 0 && W == 1) {
-            args << "], #" << sign << imm8;
+            args << "], #" << sign << (imm8 << 2);
           } else {
-            args << ", #" << sign << imm8 << "]";
+            args << ", #" << sign << (imm8 << 2) << "]";
             if (W == 1) {
               args << "!";
             }
@@ -616,57 +654,115 @@
         uint32_t op4 = (instr >> 4) & 0x1;
 
         if (coproc == 10 || coproc == 11) {   // 101x
-          if (op3 < 0x20 && (op3 >> 1) != 2) {     // 0xxxxx and not 00010x
-            // extension load/store instructions
-            int op = op3 & 0x1f;
+          if (op3 < 0x20 && (op3 & ~5) != 0) {     // 0xxxxx and not 000x0x
+            // Extension register load/store instructions
+            // |1111|110|00000|0000|1111|110|0|00000000|
+            // |5  2|1 9|87654|3  0|5  2|1 9|8|7      0|
+            // |----|---|-----|----|----|---|-|--------|
+            // |3322|222|22222|1111|1111|110|0|00000000|
+            // |1  8|7 5|4   0|9  6|5  2|1 9|8|7      0|
+            // |----|---|-----|----|----|---|-|--------|
+            // |1110|110|PUDWL| Rn | Vd |101|S|  imm8  |
             uint32_t P = (instr >> 24) & 1;
             uint32_t U = (instr >> 23) & 1;
-            uint32_t D = (instr >> 22) & 1;
             uint32_t W = (instr >> 21) & 1;
-            uint32_t S = (instr >> 8) & 1;
-            ArmRegister Rn(instr, 16);
-            uint32_t Vd = (instr >> 12) & 0xF;
-            uint32_t imm8 = instr & 0xFF;
-            uint32_t d = (S == 0 ? ((Vd << 1) | D) : (Vd | (D << 4)));
-            ArmRegister Rd(d, 0);
-
-            if (op == 8 || op == 12 || op == 10 || op == 14 ||
-                op == 18 || op == 22) {   // 01x00 or 01x10
-              // vector store multiple or vpush
-              if (P == 1 && U == 0 && W == 1 && Rn.r == 13) {
-                opcode << "vpush" << (S == 0 ? ".f64" : ".f32");
-                args << Rd << " .. " << (Rd.r + imm8);
-              } else {
-                opcode << "vstm" << (S == 0 ? ".f64" : ".f32");
-                args << Rn << ", " << Rd << " .. " << (Rd.r + imm8);
+            if (P == U && W == 1) {
+              opcode << "UNDEFINED";
+            } else {
+              uint32_t L = (instr >> 20) & 1;
+              uint32_t S = (instr >> 8) & 1;
+              ArmRegister Rn(instr, 16);
+              if (P == 1 && W == 0) {  // VLDR
+                FpRegister d(instr, 12, 22);
+                uint32_t imm8 = instr & 0xFF;
+                opcode << (L == 1 ? "vldr" : "vstr");
+                args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-")
+                     << (imm8 << 2) << "]";
+              } else if (Rn.r == 13 && W == 1 && U == L) {  // VPUSH/VPOP
+                opcode << (L == 1 ? "vpop" : "vpush");
+                args << FpRegisterRange(instr);
+              } else {  // VLDM
+                opcode << (L == 1 ? "vldm" : "vstm");
+                args << Rn << ((W == 1) ? "!" : "") << ", "
+                     << FpRegisterRange(instr);
               }
-            } else if (op == 16 || op == 20 || op == 24 || op == 28) {
-              // 1xx00
-              // vector store register
-              opcode << "vstr" << (S == 0 ? ".f64" : ".f32");
-              args << Rd << ", [" << Rn << ", #" << imm8 << "]";
-            } else if (op == 17 || op == 21 || op == 25 || op == 29) {
-              // 1xx01
-              // vector load register
-               opcode << "vldr" << (S == 0 ? ".f64" : ".f32");
-               args << Rd << ", [" << Rn << ", #" << imm8 << "]";
-            } else if (op == 9 || op == 13 || op == 11 || op == 15 ||
-                op == 19 || op == 23 ) {    // 01x11 10x11
-              // vldm or vpop
-              if (P == 1 && U == 0 && W == 1 && Rn.r == 13) {
-                opcode << "vpop" << (S == 0 ? ".f64" : ".f32");
-                args <<  Rd << " .. " << (Rd.r + imm8);
-              } else {
-                opcode << "vldm" << (S == 0 ? ".f64" : ".f32");
-                args << Rn << ", " << Rd << " .. " << (Rd.r + imm8);
-              }
+              opcode << (S == 1 ? ".f64" : ".f32");
             }
           } else if ((op3 >> 1) == 2) {      // 00010x
-            // 64 bit transfers
+            if ((instr & 0xD0) == 0x10) {
+              // 64bit transfers between ARM core and extension registers.
+              uint32_t L = (instr >> 20) & 1;
+              uint32_t S = (instr >> 8) & 1;
+              ArmRegister Rt2(instr, 16);
+              ArmRegister Rt(instr, 12);
+              FpRegister m(instr, 0, 5);
+              opcode << "vmov" << (S ? ".f64" : ".f32");
+              if (L == 1) {
+                args << Rt << ", " << Rt2 << ", ";
+              }
+              if (S) {
+                args << m;
+              } else {
+                args << m << ", " << FpRegister(m, 1);
+              }
+              if (L == 0) {
+                args << ", " << Rt << ", " << Rt2;
+              }
+              if (Rt.r == 15 || Rt.r == 13 || Rt2.r == 15 || Rt2.r == 13 ||
+                  (S == 0 && m.r == 31) || (L == 1 && Rt.r == Rt2.r)) {
+                args << " (UNPREDICTABLE)";
+              }
+            }
           } else if ((op3 >> 4) == 2 && op4 == 0) {     // 10xxxx, op = 0
             // fp data processing
           } else if ((op3 >> 4) == 2 && op4 == 1) {     // 10xxxx, op = 1
-            // 8,16,32 bit transfers
+            if (coproc == 10 && (op3 & 0xE) == 0) {
+              // VMOV (between ARM core register and single-precision register)
+              // |1111|1100|000|0 |0000|1111|1100|0|00|0|0000|
+              // |5   |1  8|7 5|4 |3  0|5  2|1  8|7|65|4|3  0|
+              // |----|----|---|- |----|----|----|-|--|-|----|
+              // |3322|2222|222|2 |1111|1111|1100|0|00|0|0000|
+              // |1  8|7  4|3 1|0 |9  6|5  2|1  8|7|65|4|3  0|
+              // |----|----|---|- |----|----|----|-|--|-|----|
+              // |1110|1110|000|op| Vn | Rt |1010|N|00|1|0000|
+              uint32_t op = op3 & 1;
+              ArmRegister Rt(instr, 12);
+              FpRegister n(instr, 16, 7);
+              opcode << "vmov.f32";
+              if (op) {
+                args << Rt << ", " << n;
+              } else {
+                args << n << ", " << Rt;
+              }
+              if (Rt.r == 13 || Rt.r == 15 || (instr & 0x6F) != 0) {
+                args << " (UNPREDICTABLE)";
+              }
+            } else if (coproc == 10 && op3 == 0x2F) {
+              // VMRS
+              // |1111|11000000|0000|1111|1100|000|0|0000|
+              // |5   |1      4|3  0|5  2|1  8|7 5|4|3  0|
+              // |----|--------|----|----|----|---|-|----|
+              // |3322|22222222|1111|1111|1100|000|0|0000|
+              // |1  8|7      0|9  6|5  2|1  8|7 5|4|3  0|
+              // |----|--------|----|----|----|---|-|----|
+              // |1110|11101111|reg | Rt |1010|000|1|0000| - last 7 0s are (0)
+              uint32_t spec_reg = (instr >> 16) & 0xF;
+              ArmRegister Rt(instr, 12);
+              opcode << "vmrs";
+              if (spec_reg == 1) {
+                if (Rt.r == 15) {
+                  args << "APSR_nzcv, FPSCR";
+                } else if (Rt.r == 13) {
+                  args << Rt << ", FPSCR (UNPREDICTABLE)";
+                } else {
+                  args << Rt << ", FPSCR";
+                }
+              } else {
+                args << "(PRIVILEGED)";
+              }
+            } else if (coproc == 11 && (op3 & 0x9) != 8) {
+              // VMOV (ARM core register to scalar or vice versa; 8/16/32-bit)
+            }
           }
         }
 
@@ -686,30 +782,19 @@
             uint32_t opc3 = (instr >> 6) & 0x3;
             if ((opc1 & 0xB) == 0xB) {  // 1x11
               // Other VFP data-processing instructions.
-              uint32_t D  = (instr >> 22) & 0x1;
-              uint32_t Vd = (instr >> 12) & 0xF;
               uint32_t sz = (instr >> 8) & 1;
-              uint32_t M  = (instr >> 5) & 1;
-              uint32_t Vm = instr & 0xF;
-              bool dp_operation = sz == 1;
+              FpRegister d(instr, 12, 22);
+              FpRegister m(instr, 0, 5);
               switch (opc2) {
                 case 0x1:  // Vneg/Vsqrt
                   //  1110 11101 D 11 0001 dddd 101s o1M0 mmmm
-                  opcode << (opc3 == 1 ? "vneg" : "vsqrt") << (dp_operation ? ".f64" : ".f32");
-                  if (dp_operation) {
-                    args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
-                  } else {
-                    args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
-                  }
+                  opcode << (opc3 == 1 ? "vneg" : "vsqrt") << (sz == 1 ? ".f64" : ".f32");
+                  args << d << ", " << m;
                   break;
                 case 0x4: case 0x5:  {  // Vector compare
                   // 1110 11101 D 11 0100 dddd 101 sE1M0 mmmm
-                  opcode << (opc3 == 1 ? "vcmp" : "vcmpe") << (dp_operation ? ".f64" : ".f32");
-                  if (dp_operation) {
-                    args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
-                  } else {
-                    args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
-                  }
+                  opcode << (opc3 == 1 ? "vcmp" : "vcmpe") << (sz == 1 ? ".f64" : ".f32");
+                  args << d << ", " << m;
                   break;
                 }
               }
@@ -720,18 +805,11 @@
           if ((instr & 0xFFBF0ED0) == 0xeeb10ac0) {  // Vsqrt
             //  1110 11101 D 11 0001 dddd 101S 11M0 mmmm
             //  1110 11101 0 11 0001 1101 1011 1100 1000 - eeb1dbc8
-            uint32_t D = (instr >> 22) & 1;
-            uint32_t Vd = (instr >> 12) & 0xF;
             uint32_t sz = (instr >> 8) & 1;
-            uint32_t M = (instr >> 5) & 1;
-            uint32_t Vm = instr & 0xF;
-            bool dp_operation = sz == 1;
-            opcode << "vsqrt" << (dp_operation ? ".f64" : ".f32");
-            if (dp_operation) {
-              args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
-            } else {
-              args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
-            }
+            FpRegister d(instr, 12, 22);
+            FpRegister m(instr, 0, 5);
+            opcode << "vsqrt" << (sz == 1 ? ".f64" : ".f32");
+            args << d << ", " << m;
           }
         }
       }
@@ -776,7 +854,7 @@
           } else if (op3 == 0x4) {
             opcode << "teq";
           } else if (op3 == 0x8) {
-            opcode << "cmw";
+            opcode << "cmn.w";
           } else {
             opcode << "cmp.w";
           }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index b9716d5..e219dd3 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -257,6 +257,9 @@
     os << "OAT DEX FILE:\n";
     os << StringPrintf("location: %s\n", oat_dex_file.GetDexFileLocation().c_str());
     os << StringPrintf("checksum: 0x%08x\n", oat_dex_file.GetDexFileLocationChecksum());
+
+    // Create the verifier early.
+
     std::string error_msg;
     UniquePtr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
     if (dex_file.get() == NULL) {
@@ -377,8 +380,20 @@
                                  oat_method.GetCode() != NULL ? "..." : "");
       Indenter indent2_filter(indent1_os.rdbuf(), kIndentChar, kIndentBy1Count);
       std::ostream indent2_os(&indent2_filter);
-      DumpCode(indent2_os, oat_method, dex_method_idx, &dex_file, class_def, code_item,
-               method_access_flags);
+
+      Runtime* runtime = Runtime::Current();
+      if (runtime != nullptr) {
+        ScopedObjectAccess soa(Thread::Current());
+        SirtRef<mirror::DexCache> dex_cache(
+            soa.Self(), runtime->GetClassLinker()->FindDexCache(dex_file));
+        SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
+        verifier::MethodVerifier verifier(&dex_file, &dex_cache, &class_loader, &class_def, code_item,
+                                          dex_method_idx, nullptr, method_access_flags, true, true);
+        verifier.Verify();
+        DumpCode(indent2_os, &verifier, oat_method, code_item);
+      } else {
+        DumpCode(indent2_os, nullptr, oat_method, code_item);
+      }
     }
   }
 
@@ -566,24 +581,10 @@
     }
   }
 
-  void DumpVRegsAtDexPc(std::ostream& os,  const OatFile::OatMethod& oat_method,
-                        uint32_t dex_method_idx, const DexFile* dex_file,
-                        const DexFile::ClassDef& class_def, const DexFile::CodeItem* code_item,
-                        uint32_t method_access_flags, uint32_t dex_pc) {
-    static UniquePtr<verifier::MethodVerifier> verifier;
-    static const DexFile* verified_dex_file = NULL;
-    static uint32_t verified_dex_method_idx = DexFile::kDexNoIndex;
-    if (dex_file != verified_dex_file || verified_dex_method_idx != dex_method_idx) {
-      ScopedObjectAccess soa(Thread::Current());
-      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file);
-      mirror::ClassLoader* class_loader = NULL;
-      verifier.reset(new verifier::MethodVerifier(dex_file, dex_cache, class_loader, &class_def,
-                                                  code_item, dex_method_idx, NULL,
-                                                  method_access_flags, true, true));
-      verifier->Verify();
-      verified_dex_file = dex_file;
-      verified_dex_method_idx = dex_method_idx;
-    }
+  void DumpVRegsAtDexPc(std::ostream& os, verifier::MethodVerifier* verifier,
+                        const OatFile::OatMethod& oat_method,
+                        const DexFile::CodeItem* code_item, uint32_t dex_pc) {
+    DCHECK(verifier != nullptr);
     std::vector<int32_t> kinds = verifier->DescribeVRegs(dex_pc);
     bool first = true;
     for (size_t reg = 0; reg < code_item->registers_size_; reg++) {
@@ -633,18 +634,16 @@
                     uint32_t method_access_flags) {
     if ((method_access_flags & kAccNative) == 0) {
       ScopedObjectAccess soa(Thread::Current());
-      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file);
-      mirror::ClassLoader* class_loader = NULL;
+      SirtRef<mirror::DexCache> dex_cache(soa.Self(), Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file));
+      SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
       verifier::MethodVerifier::VerifyMethodAndDump(os, dex_method_idx, dex_file, dex_cache,
                                                     class_loader, &class_def, code_item, NULL,
                                                     method_access_flags);
     }
   }
 
-  void DumpCode(std::ostream& os,  const OatFile::OatMethod& oat_method,
-                uint32_t dex_method_idx, const DexFile* dex_file,
-                const DexFile::ClassDef& class_def, const DexFile::CodeItem* code_item,
-                uint32_t method_access_flags) {
+  void DumpCode(std::ostream& os, verifier::MethodVerifier* verifier,
+                const OatFile::OatMethod& oat_method, const DexFile::CodeItem* code_item) {
     const void* code = oat_method.GetCode();
     size_t code_size = oat_method.GetCodeSize();
     if (code == NULL || code_size == 0) {
@@ -653,16 +652,14 @@
     }
     const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(code);
     size_t offset = 0;
-    const bool kDumpVRegs = (Runtime::Current() != NULL);
     while (offset < code_size) {
       DumpMappingAtOffset(os, oat_method, offset, false);
       offset += disassembler_->Dump(os, native_pc + offset);
       uint32_t dex_pc = DumpMappingAtOffset(os, oat_method, offset, true);
       if (dex_pc != DexFile::kDexNoIndex) {
         DumpGcMapAtNativePcOffset(os, oat_method, code_item, offset);
-        if (kDumpVRegs) {
-          DumpVRegsAtDexPc(os, oat_method, dex_method_idx, dex_file, class_def, code_item,
-                           method_access_flags, dex_pc);
+        if (verifier != nullptr) {
+          DumpVRegsAtDexPc(os, verifier, oat_method, code_item, dex_pc);
         }
       }
     }
@@ -715,14 +712,25 @@
         if (image_root_object->IsObjectArray()) {
           Indenter indent2_filter(indent1_os.rdbuf(), kIndentChar, kIndentBy1Count);
           std::ostream indent2_os(&indent2_filter);
-          // TODO: replace down_cast with AsObjectArray (g++ currently has a problem with this)
           mirror::ObjectArray<mirror::Object>* image_root_object_array
-              = down_cast<mirror::ObjectArray<mirror::Object>*>(image_root_object);
-          //  = image_root_object->AsObjectArray<Object>();
+              = image_root_object->AsObjectArray<mirror::Object>();
           for (int i = 0; i < image_root_object_array->GetLength(); i++) {
             mirror::Object* value = image_root_object_array->Get(i);
+            size_t run = 0;
+            for (int32_t j = i + 1; j < image_root_object_array->GetLength(); j++) {
+              if (value == image_root_object_array->Get(j)) {
+                run++;
+              } else {
+                break;
+              }
+            }
+            if (run == 0) {
+              indent2_os << StringPrintf("%d: ", i);
+            } else {
+              indent2_os << StringPrintf("%d to %zd: ", i, i + run);
+              i = i + run;
+            }
             if (value != NULL) {
-              indent2_os << i << ": ";
               PrettyObjectValue(indent2_os, value->GetClass(), value);
             } else {
               indent2_os << i << ": null\n";
diff --git a/runtime/Android.mk b/runtime/Android.mk
index bef4381..16f11c6 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -42,6 +42,7 @@
 	dex_instruction.cc \
 	elf_file.cc \
 	gc/allocator/dlmalloc.cc \
+	gc/allocator/rosalloc.cc \
 	gc/accounting/card_table.cc \
 	gc/accounting/gc_allocator.cc \
 	gc/accounting/heap_bitmap.cc \
@@ -50,11 +51,16 @@
 	gc/collector/garbage_collector.cc \
 	gc/collector/mark_sweep.cc \
 	gc/collector/partial_mark_sweep.cc \
+	gc/collector/semi_space.cc \
 	gc/collector/sticky_mark_sweep.cc \
 	gc/heap.cc \
+	gc/reference_queue.cc \
+	gc/space/bump_pointer_space.cc \
 	gc/space/dlmalloc_space.cc \
 	gc/space/image_space.cc \
 	gc/space/large_object_space.cc \
+	gc/space/malloc_space.cc \
+	gc/space/rosalloc_space.cc \
 	gc/space/space.cc \
 	hprof/hprof.cc \
 	image.cc \
@@ -141,6 +147,7 @@
 	arch/arm/registers_arm.cc \
 	arch/x86/registers_x86.cc \
 	arch/mips/registers_mips.cc \
+	arch/quick_alloc_entrypoints.cc \
 	entrypoints/entrypoint_utils.cc \
 	entrypoints/interpreter/interpreter_entrypoints.cc \
 	entrypoints/jni/jni_entrypoints.cc \
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 3dac636..5166d29 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -34,21 +34,6 @@
 extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
 extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
 
-// Alloc entrypoints.
-extern "C" void* art_quick_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
-
-extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
@@ -142,29 +127,7 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
-static bool quick_alloc_entry_points_instrumented = false;
-
-void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
-  quick_alloc_entry_points_instrumented = instrumented;
-}
-
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
-  if (quick_alloc_entry_points_instrumented) {
-    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
-    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
-  } else {
-    qpoints->pAllocArray = art_quick_alloc_array;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-    qpoints->pAllocObject = art_quick_alloc_object;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
-  }
-}
+extern void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
 
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 9a853d0..61be14b 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -16,6 +16,8 @@
 
 #include "asm_support_arm.S"
 
+#include "arch/quick_alloc_entrypoints.S"
+
     /* Deliver the given exception */
     .extern artDeliverExceptionFromCode
     /* Deliver an exception pending on a thread */
@@ -69,12 +71,24 @@
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     add sp, #4               @ bottom word holds Method*
     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r10
+    .cfi_restore r11
     .cfi_adjust_cfa_offset -32
 .endm
 
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
     add sp, #4               @ bottom word holds Method*
     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r10
+    .cfi_restore r11
     .cfi_adjust_cfa_offset -32
     bx  lr                   @ return
 .endm
@@ -86,7 +100,6 @@
 .macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
     push {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
     .save {r1-r3, r5-r8, r10-r11, lr}
-    .cfi_adjust_cfa_offset 40
     .cfi_rel_offset r1, 0
     .cfi_rel_offset r2, 4
     .cfi_rel_offset r3, 8
@@ -97,6 +110,7 @@
     .cfi_rel_offset r10, 28
     .cfi_rel_offset r11, 32
     .cfi_rel_offset lr, 36
+    .cfi_adjust_cfa_offset 40
     sub sp, #8                        @ 2 words of space, bottom word will hold Method*
     .pad #8
     .cfi_adjust_cfa_offset 8
@@ -105,6 +119,15 @@
 .macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
     add  sp, #8                      @ rewind sp
     pop {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
+    .cfi_restore r1
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r10
+    .cfi_restore r11
     .cfi_adjust_cfa_offset -48
 .endm
 
@@ -285,6 +308,11 @@
     ldr    ip, [sp, #24]                   @ load the result pointer
     strd   r0, [ip]                        @ store r0/r1 into result pointer
     pop    {r0, r4, r5, r9, r11, lr}       @ restore spill regs
+    .cfi_restore r0
+    .cfi_restore r4
+    .cfi_restore r5
+    .cfi_restore r9
+    .cfi_restore lr
     .cfi_adjust_cfa_offset -24
     bx     lr
 END art_quick_invoke_stub
@@ -413,6 +441,8 @@
     add sp, #4
     .cfi_adjust_cfa_offset -4
     pop {r0-r1, lr}
+    .cfi_restore r0
+    .cfi_restore r1
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     mov r3, sp                      @ pass SP
@@ -689,6 +719,7 @@
     .cfi_rel_offset r9, 0
     bl     artSet64StaticFromCode        @ (field_idx, referrer, new_val, Thread*, SP)
     add    sp, #16                       @ release out args
+    .cfi_adjust_cfa_offset -16
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME   @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
@@ -801,205 +832,42 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolve_string
 
-    /*
-     * Called by managed code to allocate an object
-     */
-    .extern artAllocObjectFromCode
-ENTRY art_quick_alloc_object
+// Macro to facilitate adding new allocation entrypoints.
+.macro TWO_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
     mov    r3, sp                     @ pass SP
-    bl     artAllocObjectFromCode     @ (uint32_t type_idx, Method* method, Thread*, SP)
+    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*, SP)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
+    \return
     DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_object
+END \name
+.endm
 
-    .extern artAllocObjectFromCodeInstrumented
-ENTRY art_quick_alloc_object_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r2, r9                     @ pass Thread::Current
-    mov    r3, sp                     @ pass SP
-    bl     artAllocObjectFromCodeInstrumented     @ (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_object_instrumented
-
-    /*
-     * Called by managed code to allocate an object when the caller doesn't know whether it has
-     * access to the created type.
-     */
-    .extern artAllocObjectFromCodeWithAccessCheck
-ENTRY art_quick_alloc_object_with_access_check
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r2, r9                     @ pass Thread::Current
-    mov    r3, sp                     @ pass SP
-    bl     artAllocObjectFromCodeWithAccessCheck  @ (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_object_with_access_check
-
-    .extern artAllocObjectFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_alloc_object_with_access_check_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r2, r9                     @ pass Thread::Current
-    mov    r3, sp                     @ pass SP
-    bl     artAllocObjectFromCodeWithAccessCheckInstrumented  @ (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_object_with_access_check_instrumented
-
-    /*
-     * Called by managed code to allocate an array.
-     */
-    .extern artAllocArrayFromCode
-ENTRY art_quick_alloc_array
+// Macro to facilitate adding new array allocation entrypoints.
+.macro THREE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
     mov    r3, r9                     @ pass Thread::Current
     mov    r12, sp
     str    r12, [sp, #-16]!           @ expand the frame and pass SP
     .pad #16
     .cfi_adjust_cfa_offset 16
-    @ artAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t component_count, Thread*, SP)
-    bl     artAllocArrayFromCode
+    @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*, SP)
+    bl     \entrypoint
     add    sp, #16                    @ strip the extra frame
     .cfi_adjust_cfa_offset -16
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
+    \return
     DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_array
+END \name
+.endm
 
-    .extern artAllocArrayFromCodeInstrumented
-ENTRY art_quick_alloc_array_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t component_count, Thread*, SP)
-    bl     artAllocArrayFromCodeInstrumented
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_array_instrumented
-
-    /*
-     * Called by managed code to allocate an array when the caller doesn't know whether it has
-     * access to the created type.
-     */
-    .extern artAllocArrayFromCodeWithAccessCheck
-ENTRY art_quick_alloc_array_with_access_check
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artAllocArrayFromCodeWithAccessCheck(type_idx, method, component_count, Thread*, SP)
-    bl     artAllocArrayFromCodeWithAccessCheck
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_array_with_access_check
-
-    .extern artAllocArrayFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_alloc_array_with_access_check_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, component_count, Thread*, SP)
-    bl     artAllocArrayFromCodeWithAccessCheckInstrumented
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_array_with_access_check_instrumented
-
-    /*
-     * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
-     */
-    .extern artCheckAndAllocArrayFromCode
-ENTRY art_quick_check_and_alloc_array
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artCheckAndAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t count, Thread* , SP)
-    bl     artCheckAndAllocArrayFromCode
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_check_and_alloc_array
-
-    .extern artCheckAndAllocArrayFromCodeInstrumented
-ENTRY art_quick_check_and_alloc_array_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t count, Thread* , SP)
-    bl     artCheckAndAllocArrayFromCodeInstrumented
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_check_and_alloc_array_instrumented
-
-    /*
-     * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
-     */
-    .extern artCheckAndAllocArrayFromCodeWithAccessCheck
-ENTRY art_quick_check_and_alloc_array_with_access_check
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artCheckAndAllocArrayFromCodeWithAccessCheck(type_idx, method, count, Thread* , SP)
-    bl     artCheckAndAllocArrayFromCodeWithAccessCheck
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_check_and_alloc_array_with_access_check
-
-    .extern artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_check_and_alloc_array_with_access_check_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, count, Thread* , SP)
-    bl     artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_check_and_alloc_array_with_access_check_instrumented
+// Generate the allocation entrypoints for each allocator.
+GENERATE_ALL_ALLOC_ENTRYPOINTS
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
@@ -1078,11 +946,10 @@
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     add     sp, #16                @ skip r1-r3, 4 bytes padding.
     .cfi_adjust_cfa_offset -16
-    cbnz    r2, 1f                 @ success if no exception is pending
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    cbnz    r2, 1f                 @ success if no exception is pending
     bx    lr                       @ return on success
 1:
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_to_interpreter_bridge
 
@@ -1137,6 +1004,8 @@
     mov   r2, r0         @ link register saved by instrumentation
     mov   lr, r1         @ r1 is holding link register if we're to bounce to deoptimize
     pop   {r0, r1}       @ restore return value
+    .cfi_restore r0
+    .cfi_restore r1
     add sp, #32          @ remove callee save frame
     .cfi_adjust_cfa_offset -32
     bx    r2             @ return
@@ -1187,6 +1056,8 @@
     mov     r1,r10
     pop     {r9 - r10}
     .cfi_adjust_cfa_offset -8
+    .cfi_restore r9
+    .cfi_restore r10
     bx      lr
 END art_quick_mul_long
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 331a461..e1b441a 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -33,21 +33,6 @@
 extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
 extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
 
-// Alloc entrypoints.
-extern "C" void* art_quick_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
-
-extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
@@ -143,29 +128,7 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
-static bool quick_alloc_entry_points_instrumented = false;
-
-void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
-  quick_alloc_entry_points_instrumented = instrumented;
-}
-
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
-  if (quick_alloc_entry_points_instrumented) {
-    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
-    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
-  } else {
-    qpoints->pAllocArray = art_quick_alloc_array;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-    qpoints->pAllocObject = art_quick_alloc_object;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
-  }
-}
+extern void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
 
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 451b1bb..2d1e87a 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -16,6 +16,8 @@
 
 #include "asm_support_mips.S"
 
+#include "arch/quick_alloc_entrypoints.S"
+
     .set noreorder
     .balign 4
 
@@ -89,28 +91,46 @@
 
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     lw     $ra, 60($sp)
+    .cfi_restore 31
     lw     $s8, 56($sp)
+    .cfi_restore 30
     lw     $gp, 52($sp)
+    .cfi_restore 28
     lw     $s7, 48($sp)
+    .cfi_restore 23
     lw     $s6, 44($sp)
+    .cfi_restore 22
     lw     $s5, 40($sp)
+    .cfi_restore 21
     lw     $s4, 36($sp)
+    .cfi_restore 20
     lw     $s3, 32($sp)
+    .cfi_restore 19
     lw     $s2, 28($sp)
+    .cfi_restore 18
     addiu  $sp, $sp, 64
     .cfi_adjust_cfa_offset -64
 .endm
 
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
     lw     $ra, 60($sp)
+    .cfi_restore 31
     lw     $s8, 56($sp)
+    .cfi_restore 30
     lw     $gp, 52($sp)
+    .cfi_restore 28
     lw     $s7, 48($sp)
+    .cfi_restore 23
     lw     $s6, 44($sp)
+    .cfi_restore 22
     lw     $s5, 40($sp)
+    .cfi_restore 21
     lw     $s4, 36($sp)
+    .cfi_restore 20
     lw     $s3, 32($sp)
+    .cfi_restore 19
     lw     $s2, 28($sp)
+    .cfi_restore 18
     jr     $ra
     addiu  $sp, $sp, 64
     .cfi_adjust_cfa_offset -64
@@ -153,17 +173,29 @@
 
 .macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
     lw     $ra, 60($sp)
+    .cfi_restore 31
     lw     $s8, 56($sp)
+    .cfi_restore 30
     lw     $gp, 52($sp)
+    .cfi_restore 28
     lw     $s7, 48($sp)
+    .cfi_restore 23
     lw     $s6, 44($sp)
+    .cfi_restore 22
     lw     $s5, 40($sp)
+    .cfi_restore 21
     lw     $s4, 36($sp)
+    .cfi_restore 20
     lw     $s3, 32($sp)
+    .cfi_restore 19
     lw     $s2, 28($sp)
+    .cfi_restore 18
     lw     $a3, 12($sp)
+    .cfi_restore 7
     lw     $a2, 8($sp)
+    .cfi_restore 6
     lw     $a1, 4($sp)
+    .cfi_restore 5
     addiu  $sp, $sp, 64           # pop frame
     .cfi_adjust_cfa_offset -64
 .endm
@@ -201,7 +233,7 @@
     DELIVER_PENDING_EXCEPTION
 .endm
 
-.macro RETURN_IF_NONZERO
+.macro RETURN_IF_RESULT_IS_NON_ZERO
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     beqz   $v0, 1f                       # success?
     nop
@@ -463,9 +495,13 @@
     sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
     move  $sp, $fp              # restore the stack
     lw    $s0, 0($sp)
+    .cfi_restore 16
     lw    $s1, 4($sp)
+    .cfi_restore 17
     lw    $fp, 8($sp)
+    .cfi_restore 30
     lw    $ra, 12($sp)
+    .cfi_restore 31
     addiu $sp, $sp, 16
     .cfi_adjust_cfa_offset -16
     lw    $t0, 16($sp)          # get result pointer
@@ -655,7 +691,7 @@
     # artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*, $sp)
     jal     artInitializeStaticStorageFromCode
     move    $a3, $sp                            # pass $sp
-    RETURN_IF_NONZERO
+    RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_initialize_static_storage
 
     /*
@@ -669,7 +705,7 @@
     # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*, $sp)
     jal     artInitializeTypeFromCode
     move    $a3, $sp                           # pass $sp
-    RETURN_IF_NONZERO
+    RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_initialize_type
 
     /*
@@ -684,7 +720,7 @@
     # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*, $sp)
     jal     artInitializeTypeAndVerifyAccessFromCode
     move    $a3, $sp                           # pass $sp
-    RETURN_IF_NONZERO
+    RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_initialize_type_and_verify_access
 
     /*
@@ -868,156 +904,37 @@
     # artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*, $sp)
     jal     artResolveStringFromCode
     move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
+    RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_resolve_string
 
-    /*
-     * Called by managed code to allocate an object.
-     */
-    .extern artAllocObjectFromCode
-ENTRY art_quick_alloc_object
+
+// Macro to facilitate adding new allocation entrypoints.
+.macro TWO_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     GENERATE_GLOBAL_POINTER
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
     move    $a2, rSELF                # pass Thread::Current
-    jal     artAllocObjectFromCode    # (uint32_t type_idx, Method* method, Thread*, $sp)
+    jal     \entrypoint
     move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_object
+    \return
+END \name
+.endm
 
-    .extern artAllocObjectFromCodeInstrumented
-ENTRY art_quick_alloc_object_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a2, rSELF                # pass Thread::Current
-    jal     artAllocObjectFromCodeInstrumented    # (uint32_t type_idx, Method* method, Thread*, $sp)
-    move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_object_instrumented
-
-    /*
-     * Called by managed code to allocate an object when the caller doesn't know whether it has
-     * access to the created type.
-     */
-    .extern artAllocObjectFromCodeWithAccessCheck
-ENTRY art_quick_alloc_object_with_access_check
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a2, rSELF                # pass Thread::Current
-    jal     artAllocObjectFromCodeWithAccessCheck  # (uint32_t type_idx, Method* method, Thread*, $sp)
-    move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_object_with_access_check
-
-    .extern artAllocObjectFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_alloc_object_with_access_check_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a2, rSELF                # pass Thread::Current
-    jal     artAllocObjectFromCodeWithAccessCheckInstrumented  # (uint32_t type_idx, Method* method, Thread*, $sp)
-    move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_object_with_access_check_instrumented
-
-    /*
-     * Called by managed code to allocate an array.
-     */
-    .extern artAllocArrayFromCode
-ENTRY art_quick_alloc_array
+.macro THREE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     GENERATE_GLOBAL_POINTER
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
     move    $a3, rSELF                # pass Thread::Current
-    # artAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t component_count, Thread*, $sp)
-    jal     artAllocArrayFromCode
+    jal     \entrypoint
     sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_array
+    \return
+END \name
+.endm
 
-    .extern artAllocArrayFromCodeInstrumented
-ENTRY art_quick_alloc_array_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t component_count, Thread*, $sp)
-    jal     artAllocArrayFromCodeInstrumented
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_array_instrumented
-
-    /*
-     * Called by managed code to allocate an array when the caller doesn't know whether it has
-     * access to the created type.
-     */
-    .extern artAllocArrayFromCodeWithAccessCheck
-ENTRY art_quick_alloc_array_with_access_check
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artAllocArrayFromCodeWithAccessCheck(type_idx, method, component_count, Thread*, $sp)
-    jal     artAllocArrayFromCodeWithAccessCheck
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_array_with_access_check
-
-    .extern artAllocArrayFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_alloc_array_with_access_check_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, component_count, Thread*, $sp)
-    jal     artAllocArrayFromCodeWithAccessCheckInstrumented
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_array_with_access_check_instrumented
-
-    /*
-     * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
-     */
-    .extern artCheckAndAllocArrayFromCode
-ENTRY art_quick_check_and_alloc_array
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artCheckAndAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t count, Thread* , $sp)
-    jal     artCheckAndAllocArrayFromCode
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_check_and_alloc_array
-
-    .extern artCheckAndAllocArrayFromCodeInstrumented
-ENTRY art_quick_check_and_alloc_array_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t count, Thread* , $sp)
-    jal     artCheckAndAllocArrayFromCodeInstrumented
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_check_and_alloc_array_instrumented
-
-    /*
-     * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
-     */
-    .extern artCheckAndAllocArrayFromCodeWithAccessCheck
-ENTRY art_quick_check_and_alloc_array_with_access_check
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artCheckAndAllocArrayFromCodeWithAccessCheck(type_idx, method, count, Thread* , $sp)
-    jal     artCheckAndAllocArrayFromCodeWithAccessCheck
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_check_and_alloc_array_with_access_check
-
-    .extern artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_check_and_alloc_array_with_access_check_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, count, Thread* , $sp)
-    jal     artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_check_and_alloc_array_with_access_check_instrumented
+// Generate the allocation entrypoints for each allocator.
+GENERATE_ALL_ALLOC_ENTRYPOINTS
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
new file mode 100644
index 0000000..bdadc51
--- /dev/null
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+.macro GENERATE_ALLOC_ENTRYPOINTS c_suffix, cxx_suffix
+// Called by managed code to allocate an object.
+TWO_ARG_DOWNCALL art_quick_alloc_object\c_suffix, artAllocObjectFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an object when the caller doesn't know whether it has access
+// to the created type.
+TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check\c_suffix, artAllocObjectFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array.
+THREE_ARG_DOWNCALL art_quick_alloc_array\c_suffix, artAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array when the caller doesn't know whether it has access
+// to the created type.
+THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check\c_suffix, artAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
+THREE_ARG_DOWNCALL art_quick_check_and_alloc_array\c_suffix, artCheckAndAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
+THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check\c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+.endm
+
+.macro GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS _instrumented, Instrumented
+GENERATE_ALLOC_ENTRYPOINTS _bump_pointer, BumpPointer
+GENERATE_ALLOC_ENTRYPOINTS _bump_pointer_instrumented, BumpPointerInstrumented
+.endm
diff --git a/runtime/arch/quick_alloc_entrypoints.cc b/runtime/arch/quick_alloc_entrypoints.cc
new file mode 100644
index 0000000..192b124
--- /dev/null
+++ b/runtime/arch/quick_alloc_entrypoints.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/heap.h"
+
+#define GENERATE_ENTRYPOINTS(suffix) \
+extern "C" void* art_quick_alloc_array##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_object##suffix(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \
+void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrumented) { \
+  if (instrumented) { \
+    qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix##_instrumented; \
+    qpoints->pAllocObject = art_quick_alloc_object##suffix##_instrumented; \
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix##_instrumented; \
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix##_instrumented; \
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented; \
+  } else { \
+    qpoints->pAllocArray = art_quick_alloc_array##suffix; \
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix; \
+    qpoints->pAllocObject = art_quick_alloc_object##suffix; \
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix; \
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix; \
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix; \
+  } \
+}
+
+namespace art {
+
+// Generate the entrypoint functions.
+GENERATE_ENTRYPOINTS();
+GENERATE_ENTRYPOINTS(_bump_pointer);
+
+static bool entry_points_instrumented = false;
+static gc::AllocatorType entry_points_allocator = kMovingCollector ?
+    gc::kAllocatorTypeBumpPointer : gc::kAllocatorTypeFreeList;
+
+void SetQuickAllocEntryPointsAllocator(gc::AllocatorType allocator) {
+  entry_points_allocator = allocator;
+}
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
+  entry_points_instrumented = instrumented;
+}
+
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+  switch (entry_points_allocator) {
+    case gc::kAllocatorTypeFreeList: {
+      SetQuickAllocEntryPoints(qpoints, entry_points_instrumented);
+      break;
+    }
+    case gc::kAllocatorTypeBumpPointer: {
+      SetQuickAllocEntryPoints_bump_pointer(qpoints, entry_points_instrumented);
+      break;
+    }
+    default: {
+      LOG(FATAL) << "Unimplemented";
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 99b0dd5..6a67079 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -32,21 +32,6 @@
 extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
 extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
 
-// Alloc entrypoints.
-extern "C" void* art_quick_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
-
-extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-
 // Cast entrypoints.
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                                 const mirror::Class* ref_class);
@@ -125,29 +110,7 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
-static bool quick_alloc_entry_points_instrumented = false;
-
-void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
-  quick_alloc_entry_points_instrumented = instrumented;
-}
-
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
-  if (quick_alloc_entry_points_instrumented) {
-    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
-    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
-  } else {
-    qpoints->pAllocArray = art_quick_alloc_array;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-    qpoints->pAllocObject = art_quick_alloc_object;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
-  }
-}
+extern void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
 
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6fe4993..9679471 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -16,14 +16,21 @@
 
 #include "asm_support_x86.S"
 
+#include "arch/quick_alloc_entrypoints.S"
+
+// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
+
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      */
 MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
+    .cfi_rel_offset edi, -8
     PUSH esi
+    .cfi_rel_offset esi, -12
     PUSH ebp
+    .cfi_rel_offset ebp, -16
     subl  MACRO_LITERAL(16), %esp  // Grow stack by 4 words, bottom word will hold Method*
     .cfi_adjust_cfa_offset 16
 END_MACRO
@@ -34,8 +41,11 @@
      */
 MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
+     .cfi_rel_offset edi, -8
     PUSH esi
+    .cfi_rel_offset esi, -12
     PUSH ebp
+     .cfi_rel_offset ebp, -16
     subl  MACRO_LITERAL(16), %esp  // Grow stack by 4 words, bottom word will hold Method*
     .cfi_adjust_cfa_offset 16
 END_MACRO
@@ -43,8 +53,11 @@
 MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
     addl MACRO_LITERAL(16), %esp  // Unwind stack up to return address
     POP ebp  // Restore callee saves (ebx is saved/restored by the upcall)
+    .cfi_restore ebp
     POP esi
+    .cfi_restore esi
     POP edi
+    .cfi_restore edi
     .cfi_adjust_cfa_offset -28
 END_MACRO
 
@@ -54,23 +67,36 @@
      */
 MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
     PUSH edi  // Save callee saves
+    .cfi_rel_offset edi, -8
     PUSH esi
+    .cfi_rel_offset esi, -12
     PUSH ebp
+    .cfi_rel_offset ebp, -16
     PUSH ebx  // Save args
+    .cfi_rel_offset ebx, -20
     PUSH edx
+    .cfi_rel_offset edx, -24
     PUSH ecx
+    .cfi_rel_offset ecx, -28
     PUSH eax   // Align stack, eax will be clobbered by Method*
+    .cfi_rel_offset eax, -28
 END_MACRO
 
 MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
     addl MACRO_LITERAL(4), %esp  // Remove padding
     .cfi_adjust_cfa_offset -4
     POP ecx  // Restore args except eax
+    .cfi_restore ecx
     POP edx
+    .cfi_restore edx
     POP ebx
+    .cfi_restore ebx
     POP ebp  // Restore callee saves
+    .cfi_restore ebp
     POP esi
+    .cfi_restore esi
     POP edi
+    .cfi_restore edi
 END_MACRO
 
     /*
@@ -188,12 +214,19 @@
     // Set up the callee save frame to conform with Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
     // return address
     PUSH edi
+    .cfi_rel_offset edi, -8
     PUSH esi
+    .cfi_rel_offset esi, -12
     PUSH ebp
-    PUSH ebx
+    .cfi_rel_offset ebp, -16
+    PUSH ebx  // Save args
+    .cfi_rel_offset ebx, -20
     PUSH edx
+    .cfi_rel_offset edx, -24
     PUSH ecx
-    PUSH eax   // <-- callee save Method* to go here
+    .cfi_rel_offset ecx, -28
+    PUSH eax    // <-- callee save Method* to go here
+    .cfi_rel_offset eax, -32
     movl %esp, %edx  // remember SP
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp  // alignment padding
@@ -209,11 +242,16 @@
     movl %edx, %edi               // save code pointer in EDI
     addl MACRO_LITERAL(36), %esp  // Pop arguments skip eax
     .cfi_adjust_cfa_offset -36
-    POP ecx                       // Restore args
+    POP ecx  // Restore args except eax
+    .cfi_restore ecx
     POP edx
+    .cfi_restore edx
     POP ebx
-    POP ebp  // Restore callee saves.
+    .cfi_restore ebx
+    POP ebp  // Restore callee saves
+    .cfi_restore ebp
     POP esi
+    .cfi_restore esi
     // Swap EDI callee save with code pointer.
     xchgl %edi, (%esp)
     testl %eax, %eax              // Branch forward if exception pending.
@@ -248,7 +286,9 @@
      */
 DEFINE_FUNCTION art_quick_invoke_stub
     PUSH ebp                      // save ebp
+    .cfi_rel_offset ebp, -8
     PUSH ebx                      // save ebx
+    .cfi_rel_offset ebx, -12
     mov %esp, %ebp                // copy value of stack pointer into base pointer
     .cfi_def_cfa_register ebp
     mov 20(%ebp), %ebx            // get arg array size
@@ -269,8 +309,11 @@
     mov 12(%esp), %ebx            // copy arg3 into ebx
     call *METHOD_CODE_OFFSET(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
+    .cfi_def_cfa_register esp
     POP ebx                       // pop ebx
+    .cfi_restore ebx
     POP ebp                       // pop ebp
+    .cfi_restore ebp
     mov 20(%esp), %ecx            // get result pointer
     cmpl LITERAL(68), 24(%esp)    // test if result type char == 'D'
     je return_double_quick
@@ -360,7 +403,7 @@
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
-MACRO0(RETURN_IF_EAX_NOT_ZERO)
+MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
     testl %eax, %eax               // eax == 0 ?
     jz  1f                         // if eax == 0 goto 1
     ret                            // return
@@ -385,24 +428,13 @@
     DELIVER_PENDING_EXCEPTION
 END_MACRO
 
-TWO_ARG_DOWNCALL art_quick_alloc_object, artAllocObjectFromCode, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check, artAllocObjectFromCodeWithAccessCheck, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_alloc_array, artAllocArrayFromCode, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check, artAllocArrayFromCodeWithAccessCheck, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_check_and_alloc_array, artCheckAndAllocArrayFromCode, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check, artCheckAndAllocArrayFromCodeWithAccessCheck, RETURN_IF_EAX_NOT_ZERO
+// Generate the allocation entrypoints for each allocator.
+GENERATE_ALL_ALLOC_ENTRYPOINTS
 
-TWO_ARG_DOWNCALL art_quick_alloc_object_instrumented, artAllocObjectFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check_instrumented, artAllocObjectFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_alloc_array_instrumented, artAllocArrayFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check_instrumented, artAllocArrayFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_instrumented, artCheckAndAllocArrayFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check_instrumented, artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
-
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_EAX_NOT_ZERO
+TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
 TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
@@ -495,7 +527,9 @@
 DEFINE_FUNCTION art_quick_check_cast
     PUSH eax                     // alignment padding
     PUSH ecx                     // pass arg2 - obj->klass
+    .cfi_rel_offset ecx, -12
     PUSH eax                     // pass arg1 - checked class
+    .cfi_rel_offset eax, -16
     call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
     testl %eax, %eax
     jz 1f                         // jump forward if not assignable
@@ -504,7 +538,9 @@
     ret
 1:
     POP eax                       // pop arguments
+    .cfi_restore eax
     POP ecx
+    .cfi_restore ecx
     addl LITERAL(4), %esp
     .cfi_adjust_cfa_offset -12
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index 298ae56..91fc143 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -66,7 +66,7 @@
 // Check that barrier wait and barrier increment work.
 TEST_F(BarrierTest, CheckWait) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Barrier test thread pool", num_threads);
   Barrier barrier(0);
   AtomicInteger count1(0);
   AtomicInteger count2(0);
@@ -121,7 +121,7 @@
 // Check that barrier pass through works.
 TEST_F(BarrierTest, CheckPass) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Barrier test thread pool", num_threads);
   Barrier barrier(0);
   AtomicInteger count(0);
   const int32_t num_tasks = num_threads * 4;
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index 0345266..7c09999 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -39,6 +39,13 @@
   BucketiseValue(value);
 }
 
+template <class Value> inline Histogram<Value>::Histogram(const char* name)
+    : kAdjust(0),
+      kInitialBucketCount(0),
+      name_(name),
+      max_buckets_(0) {
+}
+
 template <class Value>
 inline Histogram<Value>::Histogram(const char* name, Value initial_bucket_width,
                                    size_t max_buckets)
@@ -162,28 +169,30 @@
 
   double per_0 = (1.0 - interval) / 2.0;
   double per_1 = per_0 + interval;
-  os << Name() << ":\t";
   TimeUnit unit = GetAppropriateTimeUnit(Mean() * kAdjust);
+  os << Name() << ":\tSum: ";
+  os << PrettyDuration(Sum() * kAdjust) << " ";
   os << (interval * 100) << "% C.I. " << FormatDuration(Percentile(per_0, data) * kAdjust, unit);
   os << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit) << " ";
   os << "Avg: " << FormatDuration(Mean() * kAdjust, unit) << " Max: ";
   os << FormatDuration(Max() * kAdjust, unit) << "\n";
 }
 
-template <class Value> inline void Histogram<Value>::CreateHistogram(CumulativeData& out_data) {
+template <class Value>
+inline void Histogram<Value>::CreateHistogram(CumulativeData* out_data) const {
   DCHECK_GT(sample_size_, 0ull);
-  out_data.freq_.clear();
-  out_data.perc_.clear();
+  out_data->freq_.clear();
+  out_data->perc_.clear();
   uint64_t accumulated = 0;
-  out_data.freq_.push_back(accumulated);
-  out_data.perc_.push_back(0.0);
+  out_data->freq_.push_back(accumulated);
+  out_data->perc_.push_back(0.0);
   for (size_t idx = 0; idx < frequency_.size(); idx++) {
     accumulated += frequency_[idx];
-    out_data.freq_.push_back(accumulated);
-    out_data.perc_.push_back(static_cast<double>(accumulated) / static_cast<double>(sample_size_));
+    out_data->freq_.push_back(accumulated);
+    out_data->perc_.push_back(static_cast<double>(accumulated) / static_cast<double>(sample_size_));
   }
-  DCHECK_EQ(out_data.freq_.back(), sample_size_);
-  DCHECK_LE(std::abs(out_data.perc_.back() - 1.0), 0.001);
+  DCHECK_EQ(out_data->freq_.back(), sample_size_);
+  DCHECK_LE(std::abs(out_data->perc_.back() - 1.0), 0.001);
 }
 
 template <class Value>
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index 2a02cf4..a7d51e2 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -40,6 +40,10 @@
     std::vector<double> perc_;
   };
 
+  // Used by the cumulative timing logger to search the histogram set using for an existing split
+  // with the same name using CumulativeLogger::HistogramComparator.
+  explicit Histogram(const char* name);
+  // This is the expected constructor when creating new Histograms.
   Histogram(const char* name, Value initial_bucket_width, size_t max_buckets = 100);
   void AddValue(Value);
   // Builds the cumulative distribution function from the frequency data.
@@ -47,7 +51,7 @@
   // cumulative_freq[i] = sum(frequency[j] : 0 < j < i )
   // Accumulative summation of percentiles; which is the frequency / SampleSize
   // cumulative_perc[i] = sum(frequency[j] / SampleSize : 0 < j < i )
-  void CreateHistogram(CumulativeData& data);
+  void CreateHistogram(CumulativeData* data) const;
   // Reset the cumulative values, next time CreateHistogram is called it will recreate the cache.
   void Reset();
   double Mean() const;
diff --git a/runtime/base/histogram_test.cc b/runtime/base/histogram_test.cc
index 534440c..966b97f 100644
--- a/runtime/base/histogram_test.cc
+++ b/runtime/base/histogram_test.cc
@@ -85,7 +85,7 @@
   hist->AddValue(145);
   hist->AddValue(155);
 
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
   EXPECT_EQ(875, static_cast<int>(PerValue * 10));
 }
@@ -117,12 +117,12 @@
   hist->AddValue(200);
   hist->AddValue(205);
   hist->AddValue(212);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
 
   std::string text;
   std::stringstream stream;
-  std::string expected("UpdateRange:\t99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
+  std::string expected("UpdateRange:\tSum: 2.654ms 99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
   hist->PrintConfidenceIntervals(stream, 0.99, data);
 
   EXPECT_EQ(expected, stream.str());
@@ -132,7 +132,6 @@
 
 TEST(Histtest, Reset) {
   UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("Reset", 5));
-  Histogram<uint64_t>::CumulativeData data;
 
   double PerValue;
   hist->AddValue(0);
@@ -160,12 +159,13 @@
   hist->AddValue(200);
   hist->AddValue(205);
   hist->AddValue(212);
-  hist->CreateHistogram(data);
+  Histogram<uint64_t>::CumulativeData data;
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
 
   std::string text;
   std::stringstream stream;
-  std::string expected("Reset:\t99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
+  std::string expected("Reset:\tSum: 2.654ms 99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
   hist->PrintConfidenceIntervals(stream, 0.99, data);
 
   EXPECT_EQ(expected, stream.str());
@@ -185,7 +185,7 @@
   hist->AddValue(68);
   hist->AddValue(75);
   hist->AddValue(93);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   hist->AddValue(110);
   hist->AddValue(121);
   hist->AddValue(132);
@@ -194,17 +194,17 @@
   hist->AddValue(155);
   hist->AddValue(163);
   hist->AddValue(168);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   hist->AddValue(175);
   hist->AddValue(182);
   hist->AddValue(193);
   hist->AddValue(200);
   hist->AddValue(205);
   hist->AddValue(212);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
   std::stringstream stream;
-  std::string expected("MultipleCreateHist:\t99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
+  std::string expected("MultipleCreateHist:\tSum: 2.654ms 99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
   hist->PrintConfidenceIntervals(stream, 0.99, data);
 
   EXPECT_EQ(expected, stream.str());
@@ -217,9 +217,9 @@
   Histogram<uint64_t>::CumulativeData data;
 
   hist->AddValue(1);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   std::stringstream stream;
-  std::string expected = "SingleValue:\t99% C.I. 1us-1us Avg: 1us Max: 1us\n";
+  std::string expected = "SingleValue:\tSum: 1us 99% C.I. 1us-1us Avg: 1us Max: 1us\n";
   hist->PrintConfidenceIntervals(stream, 0.99, data);
   EXPECT_EQ(expected, stream.str());
 }
@@ -234,7 +234,7 @@
   for (uint64_t idx = 0ull; idx < 150ull; idx++) {
     hist->AddValue(0);
   }
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   per_995 = hist->Percentile(0.995, data);
   EXPECT_EQ(per_995, 0);
   hist->Reset();
@@ -243,7 +243,7 @@
       hist->AddValue(val);
     }
   }
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   per_005 = hist->Percentile(0.005, data);
   per_995 = hist->Percentile(0.995, data);
   EXPECT_EQ(1, per_005);
@@ -260,9 +260,9 @@
     }
   }
   hist->AddValue(10000);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   std::stringstream stream;
-  std::string expected = "SpikyValues:\t99% C.I. 0.089us-2541.825us Avg: 95.033us Max: 10000us\n";
+  std::string expected = "SpikyValues:\tSum: 14.350ms 99% C.I. 0.089us-2541.825us Avg: 95.033us Max: 10000us\n";
   hist->PrintConfidenceIntervals(stream, 0.99, data);
   EXPECT_EQ(expected, stream.str());
 }
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 3d842a0..3aabc8d 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -19,6 +19,7 @@
 #include "base/mutex.h"
 #include "runtime.h"
 #include "thread-inl.h"
+#include "UniquePtr.h"
 #include "utils.h"
 
 namespace art {
@@ -28,20 +29,21 @@
 unsigned int gAborting = 0;
 
 static LogSeverity gMinimumLogSeverity = INFO;
-static std::string* gCmdLine = NULL;
-static std::string* gProgramInvocationName = NULL;
-static std::string* gProgramInvocationShortName = NULL;
+static UniquePtr<std::string> gCmdLine;
+static UniquePtr<std::string> gProgramInvocationName;
+static UniquePtr<std::string> gProgramInvocationShortName;
 
 const char* GetCmdLine() {
-  return (gCmdLine != NULL) ? gCmdLine->c_str() : NULL;
+  return (gCmdLine.get() != nullptr) ? gCmdLine->c_str() : nullptr;
 }
 
 const char* ProgramInvocationName() {
-  return (gProgramInvocationName != NULL) ? gProgramInvocationName->c_str() : "art";
+  return (gProgramInvocationName.get() != nullptr) ? gProgramInvocationName->c_str() : "art";
 }
 
 const char* ProgramInvocationShortName() {
-  return (gProgramInvocationShortName != NULL) ? gProgramInvocationShortName->c_str() : "art";
+  return (gProgramInvocationShortName.get() != nullptr) ? gProgramInvocationShortName->c_str()
+                                                        : "art";
 }
 
 // Configure logging based on ANDROID_LOG_TAGS environment variable.
@@ -53,7 +55,7 @@
 // and a letter indicating the minimum priority level we're expected to log.
 // This can be used to reveal or conceal logs with specific tags.
 void InitLogging(char* argv[]) {
-  if (gCmdLine != NULL) {
+  if (gCmdLine.get() != nullptr) {
     return;
   }
   // TODO: Move this to a more obvious InitART...
@@ -63,17 +65,18 @@
   // but we don't have that luxury on the Mac, and there are a couple of argv[0] variants that are
   // commonly used.
   if (argv != NULL) {
-    gCmdLine = new std::string(argv[0]);
+    gCmdLine.reset(new std::string(argv[0]));
     for (size_t i = 1; argv[i] != NULL; ++i) {
       gCmdLine->append(" ");
       gCmdLine->append(argv[i]);
     }
-    gProgramInvocationName = new std::string(argv[0]);
+    gProgramInvocationName.reset(new std::string(argv[0]));
     const char* last_slash = strrchr(argv[0], '/');
-    gProgramInvocationShortName = new std::string((last_slash != NULL) ? last_slash + 1 : argv[0]);
+    gProgramInvocationShortName.reset(new std::string((last_slash != NULL) ? last_slash + 1
+                                                                           : argv[0]));
   } else {
     // TODO: fall back to /proc/self/cmdline when argv is NULL on Linux
-    gCmdLine = new std::string("<unset>");
+    gCmdLine.reset(new std::string("<unset>"));
   }
   const char* tags = getenv("ANDROID_LOG_TAGS");
   if (tags == NULL) {
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index c0cfee2..29b3981 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -130,7 +130,7 @@
   // TODO: tighten this check.
   if (kDebugLocking) {
     Runtime* runtime = Runtime::Current();
-    CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown() ||
+    CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDownLocked() ||
           level == kDefaultMutexLevel  || level == kRuntimeShutdownLock ||
           level == kThreadListLock || level == kLoggingLock || level == kAbortLock);
   }
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 249f031..ec79c55 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -266,9 +266,8 @@
 Mutex::~Mutex() {
 #if ART_USE_FUTEXES
   if (state_ != 0) {
-    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+    bool shutting_down = runtime == nullptr || runtime->IsShuttingDown(Thread::Current());
     LOG(shutting_down ? WARNING : FATAL) << "destroying mutex with owner: " << exclusive_owner_;
   } else {
     CHECK_EQ(exclusive_owner_, 0U)  << "unexpectedly found an owner on unlocked mutex " << name_;
@@ -283,7 +282,7 @@
     // TODO: should we just not log at all if shutting down? this could be the logging mutex!
     MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDownLocked();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_mutex_destroy failed for " << name_;
   }
 #endif
@@ -454,7 +453,7 @@
     // TODO: should we just not log at all if shutting down? this could be the logging mutex!
     MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = runtime == NULL || runtime->IsShuttingDown();
+    bool shutting_down = runtime == NULL || runtime->IsShuttingDownLocked();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_rwlock_destroy failed for " << name_;
   }
 #endif
@@ -641,9 +640,8 @@
 ConditionVariable::~ConditionVariable() {
 #if ART_USE_FUTEXES
   if (num_waiters_!= 0) {
-    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+    bool shutting_down = runtime == nullptr || runtime->IsShuttingDown(Thread::Current());
     LOG(shutting_down ? WARNING : FATAL) << "ConditionVariable::~ConditionVariable for " << name_
         << " called with " << num_waiters_ << " waiters.";
   }
@@ -655,7 +653,7 @@
     errno = rc;
     MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDownLocked();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_cond_destroy failed for " << name_;
   }
 #endif
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index 6df1126..c8dee6d 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -39,7 +39,7 @@
 }
 
 CumulativeLogger::~CumulativeLogger() {
-  STLDeleteValues(&histograms_);
+  STLDeleteElements(&histograms_);
 }
 
 void CumulativeLogger::SetName(const std::string& name) {
@@ -57,7 +57,7 @@
 void CumulativeLogger::Reset() {
   MutexLock mu(Thread::Current(), lock_);
   iterations_ = 0;
-  STLDeleteValues(&histograms_);
+  STLDeleteElements(&histograms_);
 }
 
 uint64_t CumulativeLogger::GetTotalNs() const {
@@ -67,60 +67,72 @@
 uint64_t CumulativeLogger::GetTotalTime() const {
   MutexLock mu(Thread::Current(), lock_);
   uint64_t total = 0;
-  for (CumulativeLogger::HistogramsIterator it = histograms_.begin(), end = histograms_.end();
-       it != end; ++it) {
-    total += it->second->Sum();
+  for (Histogram<uint64_t>* histogram : histograms_) {
+    total += histogram->Sum();
   }
   return total;
 }
 
-void CumulativeLogger::AddLogger(const base::TimingLogger &logger) {
+void CumulativeLogger::AddLogger(const TimingLogger &logger) {
   MutexLock mu(Thread::Current(), lock_);
-  const base::TimingLogger::SplitTimings& splits = logger.GetSplits();
-  for (base::TimingLogger::SplitTimingsIterator it = splits.begin(), end = splits.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  const TimingLogger::SplitTimings& splits = logger.GetSplits();
+  for (auto it = splits.begin(), end = splits.end(); it != end; ++it) {
+    TimingLogger::SplitTiming split = *it;
     uint64_t split_time = split.first;
     const char* split_name = split.second;
     AddPair(split_name, split_time);
   }
 }
 
+size_t CumulativeLogger::GetIterations() const {
+  MutexLock mu(Thread::Current(), lock_);
+  return iterations_;
+}
+
 void CumulativeLogger::Dump(std::ostream &os) {
   MutexLock mu(Thread::Current(), lock_);
   DumpHistogram(os);
 }
 
-void CumulativeLogger::AddPair(const std::string &label, uint64_t delta_time) {
+void CumulativeLogger::AddPair(const std::string& label, uint64_t delta_time) {
   // Convert delta time to microseconds so that we don't overflow our counters.
   delta_time /= kAdjust;
 
-  if (histograms_.find(label) == histograms_.end()) {
-    // TODO: Shoud this be a defined constant so we we know out of which orifice 16 and 100 were picked?
-    const size_t max_buckets = Runtime::Current()->GetHeap()->IsLowMemoryMode() ? 16 : 100;
-    // TODO: Should this be a defined constant so we know 50 of WTF?
-    histograms_[label] = new Histogram<uint64_t>(label.c_str(), 50, max_buckets);
+  Histogram<uint64_t>* histogram;
+  Histogram<uint64_t> dummy(label.c_str());
+  auto it = histograms_.find(&dummy);
+  if (it == histograms_.end()) {
+    const size_t max_buckets = Runtime::Current()->GetHeap()->IsLowMemoryMode() ?
+        kLowMemoryBucketCount : kDefaultBucketCount;
+    histogram = new Histogram<uint64_t>(label.c_str(), kInitialBucketSize, max_buckets);
+    histograms_.insert(histogram);
+  } else {
+    histogram = *it;
   }
-  histograms_[label]->AddValue(delta_time);
+  histogram->AddValue(delta_time);
 }
 
+class CompareHistorgramByTimeSpentDeclining {
+ public:
+  bool operator()(const Histogram<uint64_t>* a, const Histogram<uint64_t>* b) const {
+    return a->Sum() > b->Sum();
+  }
+};
+
 void CumulativeLogger::DumpHistogram(std::ostream &os) {
   os << "Start Dumping histograms for " << iterations_ << " iterations"
      << " for " << name_ << "\n";
-  for (CumulativeLogger::HistogramsIterator it = histograms_.begin(), end = histograms_.end();
-       it != end; ++it) {
+  std::set<Histogram<uint64_t>*, CompareHistorgramByTimeSpentDeclining>
+      sorted_histograms(histograms_.begin(), histograms_.end());
+  for (Histogram<uint64_t>* histogram : sorted_histograms) {
     Histogram<uint64_t>::CumulativeData cumulative_data;
-    it->second->CreateHistogram(cumulative_data);
-    it->second->PrintConfidenceIntervals(os, 0.99, cumulative_data);
-    // Reset cumulative values to save memory. We don't expect DumpHistogram to be called often, so
-    // it is not performance critical.
+    // We don't expect DumpHistogram to be called often, so it is not performance critical.
+    histogram->CreateHistogram(&cumulative_data);
+    histogram->PrintConfidenceIntervals(os, 0.99, cumulative_data);
   }
   os << "Done Dumping histograms \n";
 }
 
-
-namespace base {
-
 TimingLogger::TimingLogger(const char* name, bool precise, bool verbose)
     : name_(name), precise_(precise), verbose_(verbose), current_split_(NULL) {
 }
@@ -131,33 +143,35 @@
 }
 
 void TimingLogger::StartSplit(const char* new_split_label) {
-  DCHECK(new_split_label != NULL) << "Starting split (" << new_split_label << ") with null label.";
-  TimingLogger::ScopedSplit* explicit_scoped_split = new TimingLogger::ScopedSplit(new_split_label, this);
+  DCHECK(new_split_label != nullptr) << "Starting split with null label.";
+  TimingLogger::ScopedSplit* explicit_scoped_split =
+      new TimingLogger::ScopedSplit(new_split_label, this);
   explicit_scoped_split->explicit_ = true;
 }
 
 void TimingLogger::EndSplit() {
-  CHECK(current_split_ != NULL) << "Ending a non-existent split.";
-  DCHECK(current_split_->label_ != NULL);
-  DCHECK(current_split_->explicit_ == true) << "Explicitly ending scoped split: " << current_split_->label_;
-
+  CHECK(current_split_ != nullptr) << "Ending a non-existent split.";
+  DCHECK(current_split_->label_ != nullptr);
+  DCHECK(current_split_->explicit_ == true)
+      << "Explicitly ending scoped split: " << current_split_->label_;
   delete current_split_;
+  // TODO: current_split_ = nullptr;
 }
 
 // Ends the current split and starts the one given by the label.
 void TimingLogger::NewSplit(const char* new_split_label) {
-  CHECK(current_split_ != NULL) << "Inserting a new split (" << new_split_label
-                                << ") into a non-existent split.";
-  DCHECK(new_split_label != NULL) << "New split (" << new_split_label << ") with null label.";
-
-  current_split_->TailInsertSplit(new_split_label);
+  if (current_split_ == nullptr) {
+    StartSplit(new_split_label);
+  } else {
+    DCHECK(new_split_label != nullptr) << "New split (" << new_split_label << ") with null label.";
+    current_split_->TailInsertSplit(new_split_label);
+  }
 }
 
 uint64_t TimingLogger::GetTotalNs() const {
   uint64_t total_ns = 0;
-  for (base::TimingLogger::SplitTimingsIterator it = splits_.begin(), end = splits_.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
+    TimingLogger::SplitTiming split = *it;
     total_ns += split.first;
   }
   return total_ns;
@@ -166,9 +180,8 @@
 void TimingLogger::Dump(std::ostream &os) const {
   uint64_t longest_split = 0;
   uint64_t total_ns = 0;
-  for (base::TimingLogger::SplitTimingsIterator it = splits_.begin(), end = splits_.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
+    TimingLogger::SplitTiming split = *it;
     uint64_t split_time = split.first;
     longest_split = std::max(longest_split, split_time);
     total_ns += split_time;
@@ -177,9 +190,8 @@
   TimeUnit tu = GetAppropriateTimeUnit(longest_split);
   uint64_t divisor = GetNsToTimeUnitDivisor(tu);
   // Print formatted splits.
-  for (base::TimingLogger::SplitTimingsIterator it = splits_.begin(), end = splits_.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
+    const TimingLogger::SplitTiming& split = *it;
     uint64_t split_time = split.first;
     if (!precise_ && divisor >= 1000) {
       // Make the fractional part 0.
@@ -226,7 +238,7 @@
     LOG(INFO) << "End: " << label_ << " " << PrettyDuration(split_time);
   }
 
-  // If one or more enclosed explcitly started splits are not terminated we can
+  // If one or more enclosed explicitly started splits are not terminated we can
   // either fail or "unwind" the stack of splits in the timing logger to 'this'
   // (by deleting the intervening scoped splits). This implements the latter.
   TimingLogger::ScopedSplit* current = timing_logger_->current_split_;
@@ -288,5 +300,4 @@
   ATRACE_BEGIN(label_);
 }
 
-}  // namespace base
 }  // namespace art
diff --git a/runtime/base/timing_logger.h b/runtime/base/timing_logger.h
index 07d1ee0..c1ff0a3 100644
--- a/runtime/base/timing_logger.h
+++ b/runtime/base/timing_logger.h
@@ -21,15 +21,12 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 
+#include <set>
 #include <string>
 #include <vector>
-#include <map>
 
 namespace art {
-
-namespace base {
-  class TimingLogger;
-}  // namespace base
+class TimingLogger;
 
 class CumulativeLogger {
  public:
@@ -44,18 +41,27 @@
   // Allow the name to be modified, particularly when the cumulative logger is a field within a
   // parent class that is unable to determine the "name" of a sub-class.
   void SetName(const std::string& name);
-  void AddLogger(const base::TimingLogger& logger) LOCKS_EXCLUDED(lock_);
+  void AddLogger(const TimingLogger& logger) LOCKS_EXCLUDED(lock_);
+  size_t GetIterations() const;
 
  private:
-  typedef std::map<std::string, Histogram<uint64_t> *> Histograms;
-  typedef std::map<std::string, Histogram<uint64_t> *>::const_iterator HistogramsIterator;
+  class HistogramComparator {
+   public:
+    bool operator()(const Histogram<uint64_t>* a, const Histogram<uint64_t>* b) const {
+      return a->Name() < b->Name();
+    }
+  };
+
+  static constexpr size_t kLowMemoryBucketCount = 16;
+  static constexpr size_t kDefaultBucketCount = 100;
+  static constexpr size_t kInitialBucketSize = 50;  // 50 microseconds.
 
   void AddPair(const std::string &label, uint64_t delta_time)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
   void DumpHistogram(std::ostream &os) EXCLUSIVE_LOCKS_REQUIRED(lock_);
   uint64_t GetTotalTime() const;
   static const uint64_t kAdjust = 1000;
-  Histograms histograms_ GUARDED_BY(lock_);
+  std::set<Histogram<uint64_t>*, HistogramComparator> histograms_ GUARDED_BY(lock_);
   std::string name_;
   const std::string lock_name_;
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -64,19 +70,17 @@
   DISALLOW_COPY_AND_ASSIGN(CumulativeLogger);
 };
 
-namespace base {
-
-
 // A timing logger that knows when a split starts for the purposes of logging tools, like systrace.
 class TimingLogger {
  public:
   // Splits are nanosecond times and split names.
   typedef std::pair<uint64_t, const char*> SplitTiming;
   typedef std::vector<SplitTiming> SplitTimings;
-  typedef std::vector<SplitTiming>::const_iterator SplitTimingsIterator;
 
   explicit TimingLogger(const char* name, bool precise, bool verbose);
-
+  ~TimingLogger() {
+    // TODO: DCHECK(current_split_ == nullptr) << "Forgot to end split: " << current_split_->label_;
+  }
   // Clears current splits and labels.
   void Reset();
 
@@ -142,7 +146,7 @@
   friend class ScopedSplit;
  protected:
   // The name of the timing logger.
-  const char* name_;
+  const char* const name_;
 
   // Do we want to print the exactly recorded split (true) or round down to the time unit being
   // used (false).
@@ -161,7 +165,6 @@
   DISALLOW_COPY_AND_ASSIGN(TimingLogger);
 };
 
-}  // namespace base
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_TIMING_LOGGER_H_
diff --git a/runtime/base/timing_logger_test.cc b/runtime/base/timing_logger_test.cc
index 8f28e48..03cc9cc 100644
--- a/runtime/base/timing_logger_test.cc
+++ b/runtime/base/timing_logger_test.cc
@@ -26,13 +26,13 @@
 
 TEST_F(TimingLoggerTest, StartEnd) {
   const char* split1name = "First Split";
-  base::TimingLogger timings("StartEnd", true, false);
+  TimingLogger timings("StartEnd", true, false);
 
   timings.StartSplit(split1name);
 
   timings.EndSplit();  // Ends split1.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(1U, splits.size());
   EXPECT_STREQ(splits[0].second, split1name);
@@ -43,7 +43,7 @@
   const char* split1name = "First Split";
   const char* split2name = "Second Split";
   const char* split3name = "Third Split";
-  base::TimingLogger timings("StartNewEnd", true, false);
+  TimingLogger timings("StartNewEnd", true, false);
 
   timings.StartSplit(split1name);
 
@@ -53,7 +53,7 @@
 
   timings.EndSplit();  // Ends split3.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(3U, splits.size());
   EXPECT_STREQ(splits[0].second, split1name);
@@ -67,7 +67,7 @@
   const char* split3name = "Third Split";
   const char* split4name = "Fourth Split";
   const char* split5name = "Fifth Split";
-  base::TimingLogger timings("StartNewEndNested", true, false);
+  TimingLogger timings("StartNewEndNested", true, false);
 
   timings.StartSplit(split1name);
 
@@ -85,7 +85,7 @@
 
   timings.EndSplit();  // Ends split2.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(5U, splits.size());
   EXPECT_STREQ(splits[0].second, split1name);
@@ -101,25 +101,25 @@
   const char* innersplit1 = "Inner Split 1";
   const char* innerinnersplit1 = "Inner Inner Split 1";
   const char* innersplit2 = "Inner Split 2";
-  base::TimingLogger timings("Scoped", true, false);
+  TimingLogger timings("Scoped", true, false);
 
   {
-      base::TimingLogger::ScopedSplit outer(outersplit, &timings);
+      TimingLogger::ScopedSplit outer(outersplit, &timings);
 
       {
-          base::TimingLogger::ScopedSplit inner1(innersplit1, &timings);
+          TimingLogger::ScopedSplit inner1(innersplit1, &timings);
 
           {
-              base::TimingLogger::ScopedSplit innerinner1(innerinnersplit1, &timings);
+              TimingLogger::ScopedSplit innerinner1(innerinnersplit1, &timings);
           }  // Ends innerinnersplit1.
       }  // Ends innersplit1.
 
       {
-          base::TimingLogger::ScopedSplit inner2(innersplit2, &timings);
+          TimingLogger::ScopedSplit inner2(innersplit2, &timings);
       }  // Ends innersplit2.
   }  // Ends outersplit.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(4U, splits.size());
   EXPECT_STREQ(splits[0].second, innerinnersplit1);
@@ -134,12 +134,12 @@
   const char* innersplit = "Inner Split";
   const char* innerinnersplit1 = "Inner Inner Split 1";
   const char* innerinnersplit2 = "Inner Inner Split 2";
-  base::TimingLogger timings("Scoped", true, false);
+  TimingLogger timings("Scoped", true, false);
 
   timings.StartSplit(outersplit);
 
   {
-      base::TimingLogger::ScopedSplit inner(innersplit, &timings);
+      TimingLogger::ScopedSplit inner(innersplit, &timings);
 
       timings.StartSplit(innerinnersplit1);
 
@@ -148,7 +148,7 @@
 
   timings.EndSplit();  // Ends outersplit.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(4U, splits.size());
   EXPECT_STREQ(splits[0].second, innerinnersplit1);
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 54cbfe6..a84e18a 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -205,7 +205,7 @@
         // If java_object is a weak global ref whose referent has been cleared,
         // obj will be NULL.  Otherwise, obj should always be non-NULL
         // and valid.
-        if (!Runtime::Current()->GetHeap()->IsHeapAddress(obj)) {
+        if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(obj)) {
           Runtime::Current()->GetHeap()->DumpSpaces();
           JniAbortF(function_name_, "field operation on invalid %s: %p",
                     ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
@@ -242,7 +242,7 @@
   void CheckInstanceFieldID(jobject java_object, jfieldID fid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
-    if (o == NULL || !Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
+    if (o == NULL || !Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "field operation on invalid %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
@@ -455,7 +455,8 @@
           mirror::Class* c = reinterpret_cast<mirror::Class*>(Thread::Current()->DecodeJObject(jc));
           if (c == NULL) {
             msg += "NULL";
-          } else if (c == kInvalidIndirectRefObject || !Runtime::Current()->GetHeap()->IsHeapAddress(c)) {
+          } else if (c == kInvalidIndirectRefObject ||
+              !Runtime::Current()->GetHeap()->IsValidObjectAddress(c)) {
             StringAppendF(&msg, "INVALID POINTER:%p", jc);
           } else if (!c->IsClass()) {
             msg += "INVALID NON-CLASS OBJECT OF TYPE:" + PrettyTypeOf(c);
@@ -621,7 +622,7 @@
     }
 
     mirror::Object* obj = soa_.Decode<mirror::Object*>(java_object);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(obj)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(obj)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "%s is an invalid %s: %p (%p)",
                 what, ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object, obj);
@@ -675,7 +676,7 @@
     }
 
     mirror::Array* a = soa_.Decode<mirror::Array*>(java_array);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(a)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(a)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "jarray is an invalid %s: %p (%p)",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_array)).c_str(), java_array, a);
@@ -696,7 +697,7 @@
       return NULL;
     }
     mirror::ArtField* f = soa_.DecodeField(fid);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(f) || !f->IsArtField()) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f) || !f->IsArtField()) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "invalid jfieldID: %p", fid);
       return NULL;
@@ -710,7 +711,7 @@
       return NULL;
     }
     mirror::ArtMethod* m = soa_.DecodeMethod(mid);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(m) || !m->IsArtMethod()) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(m) || !m->IsArtMethod()) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "invalid jmethodID: %p", mid);
       return NULL;
@@ -731,7 +732,7 @@
     }
 
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       // TODO: when we remove work_around_app_jni_bugs, this should be impossible.
       JniAbortF(function_name_, "native code passing in reference to invalid %s: %p",
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index ad568b1..0436435 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -18,20 +18,21 @@
 #define ART_RUNTIME_CLASS_LINKER_INL_H_
 
 #include "class_linker.h"
-
 #include "mirror/art_field.h"
+#include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "mirror/iftable.h"
 #include "mirror/object_array.h"
+#include "sirt_ref.h"
 
 namespace art {
 
 inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx,
-                                           const mirror::ArtMethod* referrer) {
+                                                  const mirror::ArtMethod* referrer) {
   mirror::String* resolved_string = referrer->GetDexCacheStrings()->Get(string_idx);
   if (UNLIKELY(resolved_string == NULL)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    mirror::DexCache* dex_cache = declaring_class->GetDexCache();
+    SirtRef<mirror::DexCache> dex_cache(Thread::Current(), declaring_class->GetDexCache());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_string = ResolveString(dex_file, string_idx, dex_cache);
   }
@@ -43,8 +44,9 @@
   mirror::Class* resolved_type = referrer->GetDexCacheResolvedTypes()->Get(type_idx);
   if (UNLIKELY(resolved_type == NULL)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    mirror::DexCache* dex_cache = declaring_class->GetDexCache();
-    mirror::ClassLoader* class_loader = declaring_class->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::DexCache> dex_cache(self, declaring_class->GetDexCache());
+    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
   }
@@ -53,10 +55,12 @@
 
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, const mirror::ArtField* referrer) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
-  mirror::DexCache* dex_cache = declaring_class->GetDexCache();
-  mirror::Class* resolved_type = dex_cache->GetResolvedType(type_idx);
+  mirror::DexCache* dex_cache_ptr = declaring_class->GetDexCache();
+  mirror::Class* resolved_type = dex_cache_ptr->GetResolvedType(type_idx);
   if (UNLIKELY(resolved_type == NULL)) {
-    mirror::ClassLoader* class_loader = declaring_class->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::DexCache> dex_cache(self, dex_cache_ptr);
+    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
   }
@@ -70,8 +74,9 @@
       referrer->GetDexCacheResolvedMethods()->Get(method_idx);
   if (UNLIKELY(resolved_method == NULL || resolved_method->IsRuntimeMethod())) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    mirror::DexCache* dex_cache = declaring_class->GetDexCache();
-    mirror::ClassLoader* class_loader = declaring_class->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::DexCache> dex_cache(self, declaring_class->GetDexCache());
+    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_method = ResolveMethod(dex_file, method_idx, dex_cache, class_loader, referrer, type);
   }
@@ -81,12 +86,13 @@
 inline mirror::ArtField* ClassLinker::ResolveField(uint32_t field_idx,
                                                    const mirror::ArtMethod* referrer,
                                                    bool is_static) {
+  mirror::Class* declaring_class = referrer->GetDeclaringClass();
   mirror::ArtField* resolved_field =
-      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
+      declaring_class->GetDexCache()->GetResolvedField(field_idx);
   if (UNLIKELY(resolved_field == NULL)) {
-    mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    mirror::DexCache* dex_cache = declaring_class->GetDexCache();
-    mirror::ClassLoader* class_loader = declaring_class->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::DexCache>  dex_cache(self, declaring_class->GetDexCache());
+    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_field = ResolveField(dex_file, field_idx, dex_cache, class_loader, is_static);
   }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 184e5d4..500cb59 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -170,20 +170,6 @@
   "[Ljava/lang/StackTraceElement;",
 };
 
-ClassLinker* ClassLinker::CreateFromCompiler(const std::vector<const DexFile*>& boot_class_path,
-                                             InternTable* intern_table) {
-  CHECK_NE(boot_class_path.size(), 0U);
-  UniquePtr<ClassLinker> class_linker(new ClassLinker(intern_table));
-  class_linker->InitFromCompiler(boot_class_path);
-  return class_linker.release();
-}
-
-ClassLinker* ClassLinker::CreateFromImage(InternTable* intern_table) {
-  UniquePtr<ClassLinker> class_linker(new ClassLinker(intern_table));
-  class_linker->InitFromImage();
-  return class_linker.release();
-}
-
 ClassLinker::ClassLinker(InternTable* intern_table)
     // dex_lock_ is recursive as it may be used in stack dumping.
     : dex_lock_("ClassLinker dex lock", kDefaultMutexLevel),
@@ -211,14 +197,15 @@
   // java_lang_Class comes first, it's needed for AllocClass
   Thread* self = Thread::Current();
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  SirtRef<mirror::Class>
-      java_lang_Class(self,
-                      down_cast<mirror::Class*>(heap->AllocObject(self, NULL,
-                                                                  sizeof(mirror::ClassClass))));
+  // The GC can't handle an object with a null class since we can't get the size of this object.
+  heap->IncrementDisableGC(self);
+  SirtRef<mirror::Class> java_lang_Class(self, down_cast<mirror::Class*>(
+      heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass))));
   CHECK(java_lang_Class.get() != NULL);
   mirror::Class::SetClassClass(java_lang_Class.get());
   java_lang_Class->SetClass(java_lang_Class.get());
   java_lang_Class->SetClassSize(sizeof(mirror::ClassClass));
+  heap->DecrementDisableGC(self);
   // AllocClass(mirror::Class*) can now be used
 
   // Class[] is used for reflection support.
@@ -251,7 +238,8 @@
   java_lang_String->SetStatus(mirror::Class::kStatusResolved, self);
 
   // Create storage for root classes, save away our work so far (requires descriptors).
-  class_roots_ = mirror::ObjectArray<mirror::Class>::Alloc(self, object_array_class.get(), kClassRootsMax);
+  class_roots_ = mirror::ObjectArray<mirror::Class>::Alloc(self, object_array_class.get(),
+                                                           kClassRootsMax);
   CHECK(class_roots_ != NULL);
   SetClassRoot(kJavaLangClass, java_lang_Class.get());
   SetClassRoot(kJavaLangObject, java_lang_Object.get());
@@ -401,7 +389,7 @@
   array_iftable_->SetInterface(1, java_io_Serializable);
 
   // Sanity check Class[] and Object[]'s interfaces.
-  ClassHelper kh(class_array_class.get(), this);
+  ClassHelper kh(class_array_class.get());
   CHECK_EQ(java_lang_Cloneable, kh.GetDirectInterface(0));
   CHECK_EQ(java_io_Serializable, kh.GetDirectInterface(1));
   kh.ChangeClass(object_array_class.get());
@@ -487,7 +475,7 @@
       FindSystemClass("Ljava/lang/ref/FinalizerReference;");
 
   mirror::ArtField* pendingNext = java_lang_ref_Reference->GetInstanceField(0);
-  FieldHelper fh(pendingNext, this);
+  FieldHelper fh(pendingNext);
   CHECK_STREQ(fh.GetName(), "pendingNext");
   CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
 
@@ -1043,6 +1031,7 @@
   VLOG(startup) << "ClassLinker::InitFromImage entering";
   CHECK(!init_done_);
 
+  Thread* self = Thread::Current();
   gc::Heap* heap = Runtime::Current()->GetHeap();
   gc::space::ImageSpace* space = heap->GetImageSpace();
   dex_cache_image_class_lookup_required_ = true;
@@ -1059,9 +1048,10 @@
   mirror::ObjectArray<mirror::DexCache>* dex_caches =
       dex_caches_object->AsObjectArray<mirror::DexCache>();
 
-  mirror::ObjectArray<mirror::Class>* class_roots =
-      space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->AsObjectArray<mirror::Class>();
-  class_roots_ = class_roots;
+  SirtRef<mirror::ObjectArray<mirror::Class> > class_roots(
+      self,
+      space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->AsObjectArray<mirror::Class>());
+  class_roots_ = class_roots.get();
 
   // Special case of setting up the String class early so that we can test arbitrary objects
   // as being Strings or not
@@ -1069,7 +1059,6 @@
 
   CHECK_EQ(oat_file.GetOatHeader().GetDexFileCount(),
            static_cast<uint32_t>(dex_caches->GetLength()));
-  Thread* self = Thread::Current();
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     SirtRef<mirror::DexCache> dex_cache(self, dex_caches->Get(i));
     const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
@@ -1096,13 +1085,12 @@
   // Set entry point to interpreter if in InterpretOnly mode.
   if (Runtime::Current()->GetInstrumentation()->InterpretOnly()) {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    heap->FlushAllocStack();
-    heap->GetLiveBitmap()->Walk(InitFromImageInterpretOnlyCallback, this);
+    heap->VisitObjects(InitFromImageInterpretOnlyCallback, this);
   }
 
   // reinit class_roots_
   mirror::Class::SetClassClass(class_roots->Get(kJavaLangClass));
-  class_roots_ = class_roots;
+  class_roots_ = class_roots.get();
 
   // reinit array_iftable_ from any array class instance, they should be ==
   array_iftable_ = GetClassRoot(kObjectArrayClass)->GetIfTable();
@@ -1192,7 +1180,6 @@
   }
 }
 
-
 ClassLinker::~ClassLinker() {
   mirror::Class::ResetClass();
   mirror::String::ResetClass();
@@ -1214,10 +1201,10 @@
 
 mirror::DexCache* ClassLinker::AllocDexCache(Thread* self, const DexFile& dex_file) {
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  mirror::Class* dex_cache_class = GetClassRoot(kJavaLangDexCache);
-  SirtRef<mirror::DexCache> dex_cache(self,
-                              down_cast<mirror::DexCache*>(heap->AllocObject(self, dex_cache_class,
-                                                                dex_cache_class->GetObjectSize())));
+  SirtRef<mirror::Class> dex_cache_class(self, GetClassRoot(kJavaLangDexCache));
+  SirtRef<mirror::DexCache> dex_cache(
+      self, down_cast<mirror::DexCache*>(
+          heap->AllocObject<true>(self, dex_cache_class.get(), dex_cache_class->GetObjectSize())));
   if (dex_cache.get() == NULL) {
     return NULL;
   }
@@ -1253,13 +1240,8 @@
     return NULL;
   }
 
-  dex_cache->Init(&dex_file,
-                  location.get(),
-                  strings.get(),
-                  types.get(),
-                  methods.get(),
-                  fields.get(),
-                  initialized_static_storage.get());
+  dex_cache->Init(&dex_file, location.get(), strings.get(), types.get(), methods.get(),
+                  fields.get(), initialized_static_storage.get());
   return dex_cache.get();
 }
 
@@ -1267,7 +1249,7 @@
                                        size_t class_size) {
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  mirror::Object* k = heap->AllocObject(self, java_lang_Class, class_size);
+  mirror::Object* k = heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size);
   if (UNLIKELY(k == NULL)) {
     CHECK(self->IsExceptionPending());  // OOME.
     return NULL;
@@ -1285,18 +1267,19 @@
 }
 
 mirror::ArtField* ClassLinker::AllocArtField(Thread* self) {
-  return down_cast<mirror::ArtField*>(GetClassRoot(kJavaLangReflectArtField)->AllocObject(self));
+  return down_cast<mirror::ArtField*>(
+      GetClassRoot(kJavaLangReflectArtField)->AllocNonMovableObject(self));
 }
 
 mirror::ArtMethod* ClassLinker::AllocArtMethod(Thread* self) {
-  return down_cast<mirror::ArtMethod*>(GetClassRoot(kJavaLangReflectArtMethod)->AllocObject(self));
+  return down_cast<mirror::ArtMethod*>(
+      GetClassRoot(kJavaLangReflectArtMethod)->AllocNonMovableObject(self));
 }
 
-mirror::ObjectArray<mirror::StackTraceElement>* ClassLinker::AllocStackTraceElementArray(Thread* self,
-                                                                                         size_t length) {
-  return mirror::ObjectArray<mirror::StackTraceElement>::Alloc(self,
-                                                               GetClassRoot(kJavaLangStackTraceElementArrayClass),
-                                                               length);
+mirror::ObjectArray<mirror::StackTraceElement>* ClassLinker::AllocStackTraceElementArray(
+    Thread* self, size_t length) {
+  return mirror::ObjectArray<mirror::StackTraceElement>::Alloc(
+      self, GetClassRoot(kJavaLangStackTraceElementArrayClass), length);
 }
 
 static mirror::Class* EnsureResolved(Thread* self, mirror::Class* klass)
@@ -1332,10 +1315,12 @@
 }
 
 mirror::Class* ClassLinker::FindSystemClass(const char* descriptor) {
-  return FindClass(descriptor, NULL);
+  SirtRef<mirror::ClassLoader> class_loader(Thread::Current(), nullptr);
+  return FindClass(descriptor, class_loader);
 }
 
-mirror::Class* ClassLinker::FindClass(const char* descriptor, mirror::ClassLoader* class_loader) {
+mirror::Class* ClassLinker::FindClass(const char* descriptor,
+                                      SirtRef<mirror::ClassLoader>& class_loader) {
   DCHECK_NE(*descriptor, '\0') << "descriptor is empty string";
   Thread* self = Thread::Current();
   DCHECK(self != NULL);
@@ -1346,20 +1331,19 @@
     return FindPrimitiveClass(descriptor[0]);
   }
   // Find the class in the loaded classes table.
-  mirror::Class* klass = LookupClass(descriptor, class_loader);
+  mirror::Class* klass = LookupClass(descriptor, class_loader.get());
   if (klass != NULL) {
     return EnsureResolved(self, klass);
   }
   // Class is not yet loaded.
   if (descriptor[0] == '[') {
     return CreateArrayClass(descriptor, class_loader);
-
-  } else if (class_loader == NULL) {
+  } else if (class_loader.get() == nullptr) {
     DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, boot_class_path_);
     if (pair.second != NULL) {
-      return DefineClass(descriptor, NULL, *pair.first, *pair.second);
+      SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
+      return DefineClass(descriptor, class_loader, *pair.first, *pair.second);
     }
-
   } else if (Runtime::Current()->UseCompileTimeClassPath()) {
     // First try the boot class path, we check the descriptor first to avoid an unnecessary
     // throw of a NoClassDefFoundError.
@@ -1372,7 +1356,8 @@
     const std::vector<const DexFile*>* class_path;
     {
       ScopedObjectAccessUnchecked soa(self);
-      ScopedLocalRef<jobject> jclass_loader(soa.Env(), soa.AddLocalReference<jobject>(class_loader));
+      ScopedLocalRef<jobject> jclass_loader(soa.Env(),
+                                            soa.AddLocalReference<jobject>(class_loader.get()));
       class_path = &Runtime::Current()->GetCompileTimeClassPath(jclass_loader.get());
     }
 
@@ -1384,7 +1369,7 @@
   } else {
     ScopedObjectAccessUnchecked soa(self->GetJniEnv());
     ScopedLocalRef<jobject> class_loader_object(soa.Env(),
-                                                soa.AddLocalReference<jobject>(class_loader));
+                                                soa.AddLocalReference<jobject>(class_loader.get()));
     std::string class_name_string(DescriptorToDot(descriptor));
     ScopedLocalRef<jobject> result(soa.Env(), NULL);
     {
@@ -1418,7 +1403,7 @@
 }
 
 mirror::Class* ClassLinker::DefineClass(const char* descriptor,
-                                        mirror::ClassLoader* class_loader,
+                                        SirtRef<mirror::ClassLoader>& class_loader,
                                         const DexFile& dex_file,
                                         const DexFile::ClassDef& dex_class_def) {
   Thread* self = Thread::Current();
@@ -1449,7 +1434,7 @@
     return NULL;
   }
   klass->SetDexCache(FindDexCache(dex_file));
-  LoadClass(dex_file, dex_class_def, klass, class_loader);
+  LoadClass(dex_file, dex_class_def, klass, class_loader.get());
   // Check for a pending exception during load
   if (self->IsExceptionPending()) {
     klass->SetStatus(mirror::Class::kStatusError, self);
@@ -1457,14 +1442,12 @@
   }
   ObjectLock lock(self, klass.get());
   klass->SetClinitThreadId(self->GetTid());
-  {
-    // Add the newly loaded class to the loaded classes table.
-    mirror::Class* existing = InsertClass(descriptor, klass.get(), Hash(descriptor));
-    if (existing != NULL) {
-      // We failed to insert because we raced with another thread. Calling EnsureResolved may cause
-      // this thread to block.
-      return EnsureResolved(self, existing);
-    }
+  // Add the newly loaded class to the loaded classes table.
+  mirror::Class* existing = InsertClass(descriptor, klass.get(), Hash(descriptor));
+  if (existing != NULL) {
+    // We failed to insert because we raced with another thread. Calling EnsureResolved may cause
+    // this thread to block.
+    return EnsureResolved(self, existing);
   }
   // Finish loading (if necessary) by finding parents
   CHECK(!klass->IsLoaded());
@@ -1476,7 +1459,9 @@
   CHECK(klass->IsLoaded());
   // Link the class (if necessary)
   CHECK(!klass->IsResolved());
-  if (!LinkClass(klass, NULL, self)) {
+  // TODO: Use fast jobjects?
+  SirtRef<mirror::ObjectArray<mirror::Class> > interfaces(self, nullptr);
+  if (!LinkClass(self, klass, interfaces)) {
     // Linking failed.
     klass->SetStatus(mirror::Class::kStatusError, self);
     return NULL;
@@ -2083,7 +2068,7 @@
 //
 // Returns NULL with an exception raised on failure.
 mirror::Class* ClassLinker::CreateArrayClass(const char* descriptor,
-                                             mirror::ClassLoader* class_loader) {
+                                             SirtRef<mirror::ClassLoader>& class_loader) {
   // Identify the underlying component type
   CHECK_EQ('[', descriptor[0]);
   mirror::Class* component_type = FindClass(descriptor + 1, class_loader);
@@ -2109,7 +2094,7 @@
   // because we effectively do this lookup again when we add the new
   // class to the hash table --- necessary because of possible races with
   // other threads.)
-  if (class_loader != component_type->GetClassLoader()) {
+  if (class_loader.get() != component_type->GetClassLoader()) {
     mirror::Class* new_class = LookupClass(descriptor, component_type->GetClassLoader());
     if (new_class != NULL) {
       return new_class;
@@ -2266,11 +2251,10 @@
 bool ClassLinker::RemoveClass(const char* descriptor, const mirror::ClassLoader* class_loader) {
   size_t hash = Hash(descriptor);
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  ClassHelper kh;
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end(); it != end && it->first == hash;
        ++it) {
     mirror::Class* klass = it->second;
-    kh.ChangeClass(klass);
+    ClassHelper kh(klass);
     if ((klass->GetClassLoader() == class_loader) &&
         (strcmp(descriptor, kh.GetDescriptor()) == 0)) {
       class_table_.erase(it);
@@ -2313,18 +2297,17 @@
 mirror::Class* ClassLinker::LookupClassFromTableLocked(const char* descriptor,
                                                        const mirror::ClassLoader* class_loader,
                                                        size_t hash) {
-  ClassHelper kh(NULL, this);
   auto end = class_table_.end();
   for (auto it = class_table_.lower_bound(hash); it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
-    kh.ChangeClass(klass);
+    ClassHelper kh(klass);
     if ((klass->GetClassLoader() == class_loader) &&
         (strcmp(descriptor, kh.GetDescriptor()) == 0)) {
       if (kIsDebugBuild) {
         // Check for duplicates in the table.
         for (++it; it != end && it->first == hash; ++it) {
           mirror::Class* klass2 = it->second;
-          kh.ChangeClass(klass2);
+          ClassHelper kh(klass2);
           CHECK(!((klass2->GetClassLoader() == class_loader) &&
                   (strcmp(descriptor, kh.GetDescriptor()) == 0)))
               << PrettyClass(klass) << " " << klass << " " << klass->GetClassLoader() << " "
@@ -2354,14 +2337,13 @@
   const char* old_no_suspend_cause =
       self->StartAssertNoThreadSuspension("Moving image classes to class table");
   mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
-  ClassHelper kh(NULL, this);
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     mirror::ObjectArray<mirror::Class>* types = dex_cache->GetResolvedTypes();
     for (int32_t j = 0; j < types->GetLength(); j++) {
       mirror::Class* klass = types->Get(j);
       if (klass != NULL) {
-        kh.ChangeClass(klass);
+        ClassHelper kh(klass);
         DCHECK(klass->GetClassLoader() == NULL);
         const char* descriptor = kh.GetDescriptor();
         size_t hash = Hash(descriptor);
@@ -2429,11 +2411,10 @@
   }
   size_t hash = Hash(descriptor);
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  ClassHelper kh(NULL, this);
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
       it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
-    kh.ChangeClass(klass);
+    ClassHelper kh(klass);
     if (strcmp(descriptor, kh.GetDescriptor()) == 0) {
       result.push_back(klass);
     }
@@ -2687,12 +2668,10 @@
 static void CheckProxyMethod(mirror::ArtMethod* method,
                              SirtRef<mirror::ArtMethod>& prototype);
 
-mirror::Class* ClassLinker::CreateProxyClass(mirror::String* name,
-                                             mirror::ObjectArray<mirror::Class>* interfaces,
-                                             mirror::ClassLoader* loader,
-                                             mirror::ObjectArray<mirror::ArtMethod>* methods,
-                                             mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >* throws) {
-  Thread* self = Thread::Current();
+mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccess& soa, jstring name,
+                                             jobjectArray interfaces, jobject loader,
+                                             jobjectArray methods, jobjectArray throws) {
+  Thread* self = soa.Self();
   SirtRef<mirror::Class> klass(self, AllocClass(self, GetClassRoot(kJavaLangClass),
                                                 sizeof(mirror::SynthesizedProxyClass)));
   if (klass.get() == NULL) {
@@ -2702,9 +2681,9 @@
   DCHECK(klass->GetClass() != NULL);
   klass->SetObjectSize(sizeof(mirror::Proxy));
   klass->SetAccessFlags(kAccClassIsProxy | kAccPublic | kAccFinal);
-  klass->SetClassLoader(loader);
+  klass->SetClassLoader(soa.Decode<mirror::ClassLoader*>(loader));
   DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
-  klass->SetName(name);
+  klass->SetName(soa.Decode<mirror::String*>(name));
   mirror::Class* proxy_class = GetClassRoot(kJavaLangReflectProxy);
   klass->SetDexCache(proxy_class->GetDexCache());
   klass->SetStatus(mirror::Class::kStatusIdx, self);
@@ -2742,8 +2721,7 @@
 
   // Proxies have 1 direct method, the constructor
   {
-    mirror::ObjectArray<mirror::ArtMethod>* directs =
-      AllocArtMethodArray(self, 1);
+    mirror::ObjectArray<mirror::ArtMethod>* directs = AllocArtMethodArray(self, 1);
     if (UNLIKELY(directs == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return NULL;
@@ -2757,11 +2735,11 @@
     klass->SetDirectMethod(0, constructor);
   }
 
-  // Create virtual method using specified prototypes
-  size_t num_virtual_methods = methods->GetLength();
+  // Create virtual method using specified prototypes.
+  size_t num_virtual_methods =
+      soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods)->GetLength();
   {
-    mirror::ObjectArray<mirror::ArtMethod>* virtuals =
-        AllocArtMethodArray(self, num_virtual_methods);
+    mirror::ObjectArray<mirror::ArtMethod>* virtuals = AllocArtMethodArray(self, num_virtual_methods);
     if (UNLIKELY(virtuals == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return NULL;
@@ -2769,7 +2747,9 @@
     klass->SetVirtualMethods(virtuals);
   }
   for (size_t i = 0; i < num_virtual_methods; ++i) {
-    SirtRef<mirror::ArtMethod> prototype(self, methods->Get(i));
+    mirror::ObjectArray<mirror::ArtMethod>* decoded_methods =
+        soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods);
+    SirtRef<mirror::ArtMethod> prototype(self, decoded_methods->Get(i));
     mirror::ArtMethod* clone = CreateProxyMethod(self, klass, prototype);
     if (UNLIKELY(clone == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
@@ -2785,13 +2765,15 @@
   {
     ObjectLock lock(self, klass.get());  // Must hold lock on object when resolved.
     // Link the fields and virtual methods, creating vtable and iftables
-    if (!LinkClass(klass, interfaces, self)) {
+    SirtRef<mirror::ObjectArray<mirror::Class> > sirt_interfaces(
+        self, soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    if (!LinkClass(self, klass, sirt_interfaces)) {
       klass->SetStatus(mirror::Class::kStatusError, self);
       return NULL;
     }
 
-    interfaces_sfield->SetObject(klass.get(), interfaces);
-    throws_sfield->SetObject(klass.get(), throws);
+    interfaces_sfield->SetObject(klass.get(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    throws_sfield->SetObject(klass.get(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
     klass->SetStatus(mirror::Class::kStatusInitialized, self);
   }
 
@@ -2800,22 +2782,25 @@
     CHECK(klass->GetIFields() == NULL);
     CheckProxyConstructor(klass->GetDirectMethod(0));
     for (size_t i = 0; i < num_virtual_methods; ++i) {
-      SirtRef<mirror::ArtMethod> prototype(self, methods->Get(i));
+      mirror::ObjectArray<mirror::ArtMethod>* decoded_methods =
+          soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods);
+      SirtRef<mirror::ArtMethod> prototype(self, decoded_methods->Get(i));
       CheckProxyMethod(klass->GetVirtualMethod(i), prototype);
     }
 
+    mirror::String* decoded_name = soa.Decode<mirror::String*>(name);
     std::string interfaces_field_name(StringPrintf("java.lang.Class[] %s.interfaces",
-                                                   name->ToModifiedUtf8().c_str()));
+                                                   decoded_name->ToModifiedUtf8().c_str()));
     CHECK_EQ(PrettyField(klass->GetStaticField(0)), interfaces_field_name);
 
     std::string throws_field_name(StringPrintf("java.lang.Class[][] %s.throws",
-                                               name->ToModifiedUtf8().c_str()));
+                                               decoded_name->ToModifiedUtf8().c_str()));
     CHECK_EQ(PrettyField(klass->GetStaticField(1)), throws_field_name);
 
     mirror::SynthesizedProxyClass* synth_proxy_class =
         down_cast<mirror::SynthesizedProxyClass*>(klass.get());
-    CHECK_EQ(synth_proxy_class->GetInterfaces(), interfaces);
-    CHECK_EQ(synth_proxy_class->GetThrows(), throws);
+    CHECK_EQ(synth_proxy_class->GetInterfaces(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    CHECK_EQ(synth_proxy_class->GetThrows(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
   }
   std::string descriptor(GetDescriptorForProxy(klass.get()));
   mirror::Class* existing = InsertClass(descriptor.c_str(), klass.get(), Hash(descriptor.c_str()));
@@ -2977,6 +2962,10 @@
   return true;
 }
 
+bool ClassLinker::IsInitialized() const {
+  return init_done_;
+}
+
 bool ClassLinker::InitializeClass(mirror::Class* klass, bool can_init_statics,
                                   bool can_init_parents) {
   // see JLS 3rd edition, 12.4.2 "Detailed Initialization Procedure" for the locking protocol
@@ -3084,7 +3073,9 @@
     const DexFile::ClassDef* dex_class_def = kh.GetClassDef();
     CHECK(dex_class_def != NULL);
     const DexFile& dex_file = kh.GetDexFile();
-    EncodedStaticFieldValueIterator it(dex_file, kh.GetDexCache(), klass->GetClassLoader(),
+    SirtRef<mirror::ClassLoader> class_loader(self, klass->GetClassLoader());
+    SirtRef<mirror::DexCache> dex_cache(self, kh.GetDexCache());
+    EncodedStaticFieldValueIterator it(dex_file, &dex_cache, &class_loader,
                                        this, *dex_class_def);
     if (it.HasNext()) {
       CHECK(can_init_statics);
@@ -3196,12 +3187,11 @@
       }
     }
   }
-  mirror::IfTable* iftable = klass->GetIfTable();
   for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-    mirror::Class* interface = iftable->GetInterface(i);
+    mirror::Class* interface = klass->GetIfTable()->GetInterface(i);
     if (klass->GetClassLoader() != interface->GetClassLoader()) {
       for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-        const mirror::ArtMethod* method = iftable->GetMethodArray(i)->Get(j);
+        const mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
         if (!IsSameMethodSignatureInDifferentClassContexts(method, interface,
                                                            method->GetDeclaringClass())) {
           ThrowLinkageError(klass, "Class %s method %s resolves differently in interface %s",
@@ -3259,11 +3249,14 @@
   if (klass1 == klass2) {
     return true;
   }
-  mirror::Class* found1 = FindClass(descriptor, klass1->GetClassLoader());
+  Thread* self = Thread::Current();
+  SirtRef<mirror::ClassLoader> class_loader1(self, klass1->GetClassLoader());
+  mirror::Class* found1 = FindClass(descriptor, class_loader1);
   if (found1 == NULL) {
     Thread::Current()->ClearException();
   }
-  mirror::Class* found2 = FindClass(descriptor, klass2->GetClassLoader());
+  SirtRef<mirror::ClassLoader> class_loader2(self, klass2->GetClassLoader());
+  mirror::Class* found2 = FindClass(descriptor, class_loader2);
   if (found2 == NULL) {
     Thread::Current()->ClearException();
   }
@@ -3285,17 +3278,20 @@
 }
 
 void ClassLinker::ConstructFieldMap(const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
-                                    mirror::Class* c, SafeMap<uint32_t, mirror::ArtField*>& field_map) {
-  mirror::ClassLoader* cl = c->GetClassLoader();
+                                    mirror::Class* c,
+                                    SafeMap<uint32_t, mirror::ArtField*>& field_map) {
   const byte* class_data = dex_file.GetClassData(dex_class_def);
   ClassDataItemIterator it(dex_file, class_data);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, c->GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, c->GetClassLoader());
   for (size_t i = 0; it.HasNextStaticField(); i++, it.Next()) {
-    field_map.Put(i, ResolveField(dex_file, it.GetMemberIndex(), c->GetDexCache(), cl, true));
+    field_map.Put(i, ResolveField(dex_file, it.GetMemberIndex(), dex_cache, class_loader, true));
   }
 }
 
-bool ClassLinker::LinkClass(SirtRef<mirror::Class>& klass,
-                            mirror::ObjectArray<mirror::Class>* interfaces, Thread* self) {
+bool ClassLinker::LinkClass(Thread* self, SirtRef<mirror::Class>& klass,
+                            SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces) {
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
   if (!LinkSuperClass(klass)) {
     return false;
@@ -3419,7 +3415,7 @@
 
 // Populate the class vtable and itable. Compute return type indices.
 bool ClassLinker::LinkMethods(SirtRef<mirror::Class>& klass,
-                              mirror::ObjectArray<mirror::Class>* interfaces) {
+                              SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces) {
   if (klass->IsInterface()) {
     // No vtable.
     size_t count = klass->NumVirtualMethods();
@@ -3453,15 +3449,13 @@
       return false;
     }
     // See if any of our virtual methods override the superclass.
-    MethodHelper local_mh(NULL, this);
-    MethodHelper super_mh(NULL, this);
     for (size_t i = 0; i < klass->NumVirtualMethods(); ++i) {
       mirror::ArtMethod* local_method = klass->GetVirtualMethodDuringLinking(i);
-      local_mh.ChangeMethod(local_method);
+      MethodHelper local_mh(local_method);
       size_t j = 0;
       for (; j < actual_count; ++j) {
         mirror::ArtMethod* super_method = vtable->Get(j);
-        super_mh.ChangeMethod(super_method);
+        MethodHelper super_mh(super_method);
         if (local_mh.HasSameNameAndSignature(&super_mh)) {
           if (klass->CanAccessMember(super_method->GetDeclaringClass(), super_method->GetAccessFlags())) {
             if (super_method->IsFinal()) {
@@ -3525,7 +3519,7 @@
 }
 
 bool ClassLinker::LinkInterfaceMethods(SirtRef<mirror::Class>& klass,
-                                       mirror::ObjectArray<mirror::Class>* interfaces) {
+                                       SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces) {
   // Set the imt table to be all conflicts by default.
   klass->SetImTable(Runtime::Current()->GetDefaultImt());
   size_t super_ifcount;
@@ -3535,11 +3529,13 @@
     super_ifcount = 0;
   }
   size_t ifcount = super_ifcount;
-  ClassHelper kh(klass.get(), this);
-  uint32_t num_interfaces = interfaces == NULL ? kh.NumDirectInterfaces() : interfaces->GetLength();
+  ClassHelper kh(klass.get());
+  uint32_t num_interfaces =
+      interfaces.get() == nullptr ? kh.NumDirectInterfaces() : interfaces->GetLength();
   ifcount += num_interfaces;
   for (size_t i = 0; i < num_interfaces; i++) {
-    mirror::Class* interface = interfaces == NULL ? kh.GetDirectInterface(i) : interfaces->Get(i);
+    mirror::Class* interface =
+        interfaces.get() == nullptr ? kh.GetDirectInterface(i) : interfaces->Get(i);
     ifcount += interface->GetIfTableCount();
   }
   if (ifcount == 0) {
@@ -3580,7 +3576,8 @@
   // Flatten the interface inheritance hierarchy.
   size_t idx = super_ifcount;
   for (size_t i = 0; i < num_interfaces; i++) {
-    mirror::Class* interface = interfaces == NULL ? kh.GetDirectInterface(i) : interfaces->Get(i);
+    mirror::Class* interface =
+        interfaces.get() == nullptr ? kh.GetDirectInterface(i) : interfaces->Get(i);
     DCHECK(interface != NULL);
     if (!interface->IsInterface()) {
       ClassHelper ih(interface);
@@ -3643,20 +3640,21 @@
     return false;
   }
   std::vector<mirror::ArtMethod*> miranda_list;
-  MethodHelper vtable_mh(NULL, this);
-  MethodHelper interface_mh(NULL, this);
+  MethodHelper vtable_mh(NULL);
+  MethodHelper interface_mh(NULL);
   for (size_t i = 0; i < ifcount; ++i) {
     mirror::Class* interface = iftable->GetInterface(i);
     size_t num_methods = interface->NumVirtualMethods();
     if (num_methods > 0) {
-      mirror::ObjectArray<mirror::ArtMethod>* method_array =
-          AllocArtMethodArray(self, num_methods);
-      if (UNLIKELY(method_array == NULL)) {
+      SirtRef<mirror::ObjectArray<mirror::ArtMethod> >
+          method_array(self, AllocArtMethodArray(self, num_methods));
+      if (UNLIKELY(method_array.get() == nullptr)) {
         CHECK(self->IsExceptionPending());  // OOME.
         return false;
       }
-      iftable->SetMethodArray(i, method_array);
-      mirror::ObjectArray<mirror::ArtMethod>* vtable = klass->GetVTableDuringLinking();
+      iftable->SetMethodArray(i, method_array.get());
+      SirtRef<mirror::ObjectArray<mirror::ArtMethod> > vtable(self,
+                                                              klass->GetVTableDuringLinking());
       for (size_t j = 0; j < num_methods; ++j) {
         mirror::ArtMethod* interface_method = interface->GetVirtualMethod(j);
         interface_mh.ChangeMethod(interface_method);
@@ -3709,10 +3707,7 @@
               CHECK(self->IsExceptionPending());  // OOME.
               return false;
             }
-#ifdef MOVING_GARBAGE_COLLECTOR
             // TODO: If a methods move then the miranda_list may hold stale references.
-            UNIMPLEMENTED(FATAL);
-#endif
             miranda_list.push_back(miranda_method.get());
           }
           method_array->Set(j, miranda_method.get());
@@ -3791,17 +3786,16 @@
 }
 
 struct LinkFieldsComparator {
-  explicit LinkFieldsComparator(FieldHelper* fh)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : fh_(fh) {}
+  explicit LinkFieldsComparator() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  }
   // No thread safety analysis as will be called from STL. Checked lock held in constructor.
   bool operator()(const mirror::ArtField* field1, const mirror::ArtField* field2)
       NO_THREAD_SAFETY_ANALYSIS {
     // First come reference fields, then 64-bit, and finally 32-bit
-    fh_->ChangeField(field1);
-    Primitive::Type type1 = fh_->GetTypeAsPrimitiveType();
-    fh_->ChangeField(field2);
-    Primitive::Type type2 = fh_->GetTypeAsPrimitiveType();
+    FieldHelper fh1(field1);
+    Primitive::Type type1 = fh1.GetTypeAsPrimitiveType();
+    FieldHelper fh2(field2);
+    Primitive::Type type2 = fh2.GetTypeAsPrimitiveType();
     bool isPrimitive1 = type1 != Primitive::kPrimNot;
     bool isPrimitive2 = type2 != Primitive::kPrimNot;
     bool is64bit1 = isPrimitive1 && (type1 == Primitive::kPrimLong || type1 == Primitive::kPrimDouble);
@@ -3813,14 +3807,10 @@
     }
 
     // same basic group? then sort by string.
-    fh_->ChangeField(field1);
-    const char* name1 = fh_->GetName();
-    fh_->ChangeField(field2);
-    const char* name2 = fh_->GetName();
+    const char* name1 = fh1.GetName();
+    const char* name2 = fh2.GetName();
     return strcmp(name1, name2) < 0;
   }
-
-  FieldHelper* fh_;
 };
 
 bool ClassLinker::LinkFields(SirtRef<mirror::Class>& klass, bool is_static) {
@@ -3855,17 +3845,15 @@
     CHECK(f != NULL);
     grouped_and_sorted_fields.push_back(f);
   }
-  FieldHelper fh(NULL, this);
-  std::sort(grouped_and_sorted_fields.begin(),
-            grouped_and_sorted_fields.end(),
-            LinkFieldsComparator(&fh));
+  std::sort(grouped_and_sorted_fields.begin(), grouped_and_sorted_fields.end(),
+            LinkFieldsComparator());
 
   // References should be at the front.
   size_t current_field = 0;
   size_t num_reference_fields = 0;
   for (; current_field < num_fields; current_field++) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
-    fh.ChangeField(field);
+    FieldHelper fh(field);
     Primitive::Type type = fh.GetTypeAsPrimitiveType();
     bool isPrimitive = type != Primitive::kPrimNot;
     if (isPrimitive) {
@@ -3884,7 +3872,7 @@
   if (current_field != num_fields && !IsAligned<8>(field_offset.Uint32Value())) {
     for (size_t i = 0; i < grouped_and_sorted_fields.size(); i++) {
       mirror::ArtField* field = grouped_and_sorted_fields[i];
-      fh.ChangeField(field);
+      FieldHelper fh(field);
       Primitive::Type type = fh.GetTypeAsPrimitiveType();
       CHECK(type != Primitive::kPrimNot);  // should only be working on primitive types
       if (type == Primitive::kPrimLong || type == Primitive::kPrimDouble) {
@@ -3906,7 +3894,7 @@
   while (!grouped_and_sorted_fields.empty()) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
     grouped_and_sorted_fields.pop_front();
-    fh.ChangeField(field);
+    FieldHelper fh(field);
     Primitive::Type type = fh.GetTypeAsPrimitiveType();
     CHECK(type != Primitive::kPrimNot);  // should only be working on primitive types
     fields->Set(current_field, field);
@@ -3920,11 +3908,11 @@
 
   // We lie to the GC about the java.lang.ref.Reference.referent field, so it doesn't scan it.
   if (!is_static &&
-      (strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get(), this).GetDescriptor()) == 0)) {
+      (strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get()).GetDescriptor()) == 0)) {
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
     CHECK_EQ(num_reference_fields, num_fields);
-    fh.ChangeField(fields->Get(num_fields - 1));
+    FieldHelper fh(fields->Get(num_fields - 1));
     CHECK_STREQ(fh.GetName(), "referent");
     --num_reference_fields;
   }
@@ -3942,10 +3930,10 @@
                     << " offset=" << field->GetField32(MemberOffset(mirror::ArtField::OffsetOffset()),
                                                        false);
       }
-      fh.ChangeField(field);
+      FieldHelper fh(field);
       Primitive::Type type = fh.GetTypeAsPrimitiveType();
       bool is_primitive = type != Primitive::kPrimNot;
-      if ((strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get(), this).GetDescriptor()) == 0)
+      if ((strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get()).GetDescriptor()) == 0)
           && (strcmp("referent", fh.GetName()) == 0)) {
         is_primitive = true;  // We lied above, so we have to expect a lie here.
       }
@@ -3970,7 +3958,7 @@
   } else {
     klass->SetNumReferenceInstanceFields(num_reference_fields);
     if (!klass->IsVariableSize()) {
-      DCHECK_GE(size, sizeof(mirror::Object)) << ClassHelper(klass.get(), this).GetDescriptor();
+      DCHECK_GE(size, sizeof(mirror::Object)) << ClassHelper(klass.get()).GetDescriptor();
       size_t previous_size = klass->GetObjectSize();
       if (previous_size != 0) {
         // Make sure that we didn't originally have an incorrect size.
@@ -4034,9 +4022,9 @@
   }
 }
 
-mirror::String* ClassLinker::ResolveString(const DexFile& dex_file,
-                                           uint32_t string_idx, mirror::DexCache* dex_cache) {
-  DCHECK(dex_cache != NULL);
+mirror::String* ClassLinker::ResolveString(const DexFile& dex_file, uint32_t string_idx,
+                                           SirtRef<mirror::DexCache>& dex_cache) {
+  DCHECK(dex_cache.get() != nullptr);
   mirror::String* resolved = dex_cache->GetResolvedString(string_idx);
   if (resolved != NULL) {
     return resolved;
@@ -4048,11 +4036,18 @@
   return string;
 }
 
-mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
-                                        uint16_t type_idx,
-                                        mirror::DexCache* dex_cache,
-                                        mirror::ClassLoader* class_loader) {
-  DCHECK(dex_cache != NULL);
+mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
+                                        const mirror::Class* referrer) {
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, referrer->GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, referrer->GetClassLoader());
+  return ResolveType(dex_file, type_idx, dex_cache, class_loader);
+}
+
+mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
+                                        SirtRef<mirror::DexCache>& dex_cache,
+                                        SirtRef<mirror::ClassLoader>& class_loader) {
+  DCHECK(dex_cache.get() != NULL);
   mirror::Class* resolved = dex_cache->GetResolvedType(type_idx);
   if (resolved == NULL) {
     const char* descriptor = dex_file.StringByTypeIdx(type_idx);
@@ -4082,11 +4077,11 @@
 
 mirror::ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
                                               uint32_t method_idx,
-                                              mirror::DexCache* dex_cache,
-                                              mirror::ClassLoader* class_loader,
+                                              SirtRef<mirror::DexCache>& dex_cache,
+                                              SirtRef<mirror::ClassLoader>& class_loader,
                                               const mirror::ArtMethod* referrer,
                                               InvokeType type) {
-  DCHECK(dex_cache != NULL);
+  DCHECK(dex_cache.get() != NULL);
   // Check for hit in the dex cache.
   mirror::ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx);
   if (resolved != NULL && !resolved->IsRuntimeMethod()) {
@@ -4104,15 +4099,15 @@
   switch (type) {
     case kDirect:  // Fall-through.
     case kStatic:
-      resolved = klass->FindDirectMethod(dex_cache, method_idx);
+      resolved = klass->FindDirectMethod(dex_cache.get(), method_idx);
       break;
     case kInterface:
-      resolved = klass->FindInterfaceMethod(dex_cache, method_idx);
+      resolved = klass->FindInterfaceMethod(dex_cache.get(), method_idx);
       DCHECK(resolved == NULL || resolved->GetDeclaringClass()->IsInterface());
       break;
     case kSuper:  // Fall-through.
     case kVirtual:
-      resolved = klass->FindVirtualMethod(dex_cache, method_idx);
+      resolved = klass->FindVirtualMethod(dex_cache.get(), method_idx);
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << type;
@@ -4227,12 +4222,11 @@
   }
 }
 
-mirror::ArtField* ClassLinker::ResolveField(const DexFile& dex_file,
-                                            uint32_t field_idx,
-                                            mirror::DexCache* dex_cache,
-                                            mirror::ClassLoader* class_loader,
+mirror::ArtField* ClassLinker::ResolveField(const DexFile& dex_file, uint32_t field_idx,
+                                            SirtRef<mirror::DexCache>& dex_cache,
+                                            SirtRef<mirror::ClassLoader>& class_loader,
                                             bool is_static) {
-  DCHECK(dex_cache != NULL);
+  DCHECK(dex_cache.get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
   if (resolved != NULL) {
     return resolved;
@@ -4245,9 +4239,9 @@
   }
 
   if (is_static) {
-    resolved = klass->FindStaticField(dex_cache, field_idx);
+    resolved = klass->FindStaticField(dex_cache.get(), field_idx);
   } else {
-    resolved = klass->FindInstanceField(dex_cache, field_idx);
+    resolved = klass->FindInstanceField(dex_cache.get(), field_idx);
   }
 
   if (resolved == NULL) {
@@ -4269,9 +4263,9 @@
 
 mirror::ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file,
                                                uint32_t field_idx,
-                                               mirror::DexCache* dex_cache,
-                                               mirror::ClassLoader* class_loader) {
-  DCHECK(dex_cache != NULL);
+                                               SirtRef<mirror::DexCache>& dex_cache,
+                                               SirtRef<mirror::ClassLoader>& class_loader) {
+  DCHECK(dex_cache.get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
   if (resolved != NULL) {
     return resolved;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 473370d..4e2cc06 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -25,6 +25,7 @@
 #include "base/mutex.h"
 #include "dex_file.h"
 #include "gtest/gtest.h"
+#include "jni.h"
 #include "root_visitor.h"
 #include "oat_file.h"
 
@@ -45,6 +46,7 @@
 
 class InternTable;
 class ObjectLock;
+class ScopedObjectAccess;
 template<class T> class SirtRef;
 
 typedef bool (ClassVisitor)(mirror::Class* c, void* arg);
@@ -56,29 +58,31 @@
   // (non-marker) interfaces.
   static constexpr size_t kImtSize = 64;
 
-  // Creates the class linker by bootstrapping from dex files.
-  static ClassLinker* CreateFromCompiler(const std::vector<const DexFile*>& boot_class_path,
-                                         InternTable* intern_table)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Creates the class linker from an image.
-  static ClassLinker* CreateFromImage(InternTable* intern_table)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
+  explicit ClassLinker(InternTable* intern_table);
   ~ClassLinker();
 
+  // Initialize class linker by bootstraping from dex files
+  void InitFromCompiler(const std::vector<const DexFile*>& boot_class_path)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Initialize class linker from one or more images.
+  void InitFromImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool IsInBootClassPath(const char* descriptor);
 
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
-  mirror::Class* FindClass(const char* descriptor, mirror::ClassLoader* class_loader)
+  mirror::Class* FindClass(const char* descriptor, SirtRef<mirror::ClassLoader>& class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Class* FindSystemClass(const char* descriptor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Reutrns true if the class linker is initialized.
+  bool IsInitialized() const;
+
   // Define a new a class based on a ClassDef from a DexFile
-  mirror::Class* DefineClass(const char* descriptor, mirror::ClassLoader* class_loader,
+  mirror::Class* DefineClass(const char* descriptor, SirtRef<mirror::ClassLoader>& class_loader,
                              const DexFile& dex_file, const DexFile::ClassDef& dex_class_def)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -122,7 +126,7 @@
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache.
   mirror::String* ResolveString(const DexFile& dex_file, uint32_t string_idx,
-                                mirror::DexCache* dex_cache)
+                                SirtRef<mirror::DexCache>& dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
@@ -130,12 +134,7 @@
   // target DexCache and ClassLoader to use for resolution.
   mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx,
                              const mirror::Class* referrer)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return ResolveType(dex_file,
-                       type_idx,
-                       referrer->GetDexCache(),
-                       referrer->GetClassLoader());
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
@@ -150,10 +149,9 @@
   // result in DexCache. The ClassLoader is used to search for the
   // type, since it may be referenced from but not contained within
   // the given DexFile.
-  mirror::Class* ResolveType(const DexFile& dex_file,
-                             uint16_t type_idx,
-                             mirror::DexCache* dex_cache,
-                             mirror::ClassLoader* class_loader)
+  mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx,
+                             SirtRef<mirror::DexCache>& dex_cache,
+                             SirtRef<mirror::ClassLoader>& class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a method with a given ID from the DexFile, storing the
@@ -163,8 +161,8 @@
   // virtual method.
   mirror::ArtMethod* ResolveMethod(const DexFile& dex_file,
                                    uint32_t method_idx,
-                                   mirror::DexCache* dex_cache,
-                                   mirror::ClassLoader* class_loader,
+                                   SirtRef<mirror::DexCache>& dex_cache,
+                                   SirtRef<mirror::ClassLoader>& class_loader,
                                    const mirror::ArtMethod* referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -184,8 +182,8 @@
   // field.
   mirror::ArtField* ResolveField(const DexFile& dex_file,
                                  uint32_t field_idx,
-                                 mirror::DexCache* dex_cache,
-                                 mirror::ClassLoader* class_loader,
+                                 SirtRef<mirror::DexCache>& dex_cache,
+                                 SirtRef<mirror::ClassLoader>& class_loader,
                                  bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -195,8 +193,8 @@
   // field resolution semantics are followed.
   mirror::ArtField* ResolveFieldJLS(const DexFile& dex_file,
                                     uint32_t field_idx,
-                                    mirror::DexCache* dex_cache,
-                                    mirror::ClassLoader* class_loader)
+                                    SirtRef<mirror::DexCache>& dex_cache,
+                                    SirtRef<mirror::ClassLoader>& class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get shorty from method index without resolution. Used to do handlerization.
@@ -314,10 +312,8 @@
   void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, mirror::ArtMethod* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Class* CreateProxyClass(mirror::String* name, mirror::ObjectArray<mirror::Class>* interfaces,
-                                  mirror::ClassLoader* loader,
-                                  mirror::ObjectArray<mirror::ArtMethod>* methods,
-                                  mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >* throws)
+  mirror::Class* CreateProxyClass(ScopedObjectAccess& soa, jstring name, jobjectArray interfaces,
+                                  jobject loader, jobjectArray methods, jobjectArray throws)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   std::string GetDescriptorForProxy(const mirror::Class* proxy_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -364,18 +360,13 @@
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
- private:
-  explicit ClassLinker(InternTable*);
+  // Special code to allocate an art method, use this instead of class->AllocObject.
+  mirror::ArtMethod* AllocArtMethod(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+ private:
   const OatFile::OatMethod GetOatMethodFor(const mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Initialize class linker by bootstraping from dex files
-  void InitFromCompiler(const std::vector<const DexFile*>& boot_class_path)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Initialize class linker from one or more images.
-  void InitFromImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   OatFile& GetImageOatFile(gc::space::ImageSpace* space)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -393,7 +384,6 @@
   mirror::DexCache* AllocDexCache(Thread* self, const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::ArtField* AllocArtField(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::ArtMethod* AllocArtMethod(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Class* CreatePrimitiveClass(Thread* self, Primitive::Type type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -401,7 +391,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
-  mirror::Class* CreateArrayClass(const char* descriptor, mirror::ClassLoader* class_loader)
+  mirror::Class* CreateArrayClass(const char* descriptor,
+                                  SirtRef<mirror::ClassLoader>& class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void AppendToBootClassPath(const DexFile& dex_file)
@@ -458,8 +449,8 @@
                                                      const mirror::Class* klass2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkClass(SirtRef<mirror::Class>& klass, mirror::ObjectArray<mirror::Class>* interfaces,
-                 Thread* self)
+  bool LinkClass(Thread* self, SirtRef<mirror::Class>& klass,
+                 SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkSuperClass(SirtRef<mirror::Class>& klass)
@@ -468,14 +459,15 @@
   bool LoadSuperAndInterfaces(SirtRef<mirror::Class>& klass, const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkMethods(SirtRef<mirror::Class>& klass, mirror::ObjectArray<mirror::Class>* interfaces)
+  bool LinkMethods(SirtRef<mirror::Class>& klass,
+                   SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkVirtualMethods(SirtRef<mirror::Class>& klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkInterfaceMethods(SirtRef<mirror::Class>& klass,
-                            mirror::ObjectArray<mirror::Class>* interfaces)
+                            SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkStaticFields(SirtRef<mirror::Class>& klass)
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index a52b680..b8bc474 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -95,7 +95,8 @@
                         const std::string& component_type,
                         mirror::ClassLoader* class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::Class* array = class_linker_->FindClass(array_descriptor.c_str(), class_loader);
+    SirtRef<mirror::ClassLoader> loader(Thread::Current(), class_loader);
+    mirror::Class* array = class_linker_->FindClass(array_descriptor.c_str(), loader);
     ClassHelper array_component_ch(array->GetComponentType());
     EXPECT_STREQ(component_type.c_str(), array_component_ch.GetDescriptor());
     EXPECT_EQ(class_loader, array->GetClassLoader());
@@ -647,12 +648,12 @@
   ScopedObjectAccess soa(Thread::Current());
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Nested")));
 
-  mirror::Class* outer = class_linker_->FindClass("LNested;", class_loader.get());
+  mirror::Class* outer = class_linker_->FindClass("LNested;", class_loader);
   ASSERT_TRUE(outer != NULL);
   EXPECT_EQ(0U, outer->NumVirtualMethods());
   EXPECT_EQ(1U, outer->NumDirectMethods());
 
-  mirror::Class* inner = class_linker_->FindClass("LNested$Inner;", class_loader.get());
+  mirror::Class* inner = class_linker_->FindClass("LNested$Inner;", class_loader);
   ASSERT_TRUE(inner != NULL);
   EXPECT_EQ(0U, inner->NumVirtualMethods());
   EXPECT_EQ(1U, inner->NumDirectMethods());
@@ -711,7 +712,7 @@
 
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass")));
   AssertNonExistentClass("LMyClass;");
-  mirror::Class* MyClass = class_linker_->FindClass("LMyClass;", class_loader.get());
+  mirror::Class* MyClass = class_linker_->FindClass("LMyClass;", class_loader);
   kh.ChangeClass(MyClass);
   ASSERT_TRUE(MyClass != NULL);
   ASSERT_TRUE(MyClass->GetClass() != NULL);
@@ -809,29 +810,30 @@
   // Validate that the "value" field is always the 0th field in each of java.lang's box classes.
   // This lets UnboxPrimitive avoid searching for the field by name at runtime.
   ScopedObjectAccess soa(Thread::Current());
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
   mirror::Class* c;
-  c = class_linker_->FindClass("Ljava/lang/Boolean;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Boolean;", class_loader);
   FieldHelper fh(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Byte;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Byte;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Character;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Character;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Double;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Double;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Float;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Float;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Integer;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Integer;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Long;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Long;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Short;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Short;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
 }
@@ -840,8 +842,8 @@
   ScopedObjectAccess soa(Thread::Current());
   SirtRef<mirror::ClassLoader> class_loader_1(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass")));
   SirtRef<mirror::ClassLoader> class_loader_2(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass")));
-  mirror::Class* MyClass_1 = class_linker_->FindClass("LMyClass;", class_loader_1.get());
-  mirror::Class* MyClass_2 = class_linker_->FindClass("LMyClass;", class_loader_2.get());
+  mirror::Class* MyClass_1 = class_linker_->FindClass("LMyClass;", class_loader_1);
+  mirror::Class* MyClass_2 = class_linker_->FindClass("LMyClass;", class_loader_2);
   EXPECT_TRUE(MyClass_1 != NULL);
   EXPECT_TRUE(MyClass_2 != NULL);
   EXPECT_NE(MyClass_1, MyClass_2);
@@ -850,7 +852,7 @@
 TEST_F(ClassLinkerTest, StaticFields) {
   ScopedObjectAccess soa(Thread::Current());
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Statics")));
-  mirror::Class* statics = class_linker_->FindClass("LStatics;", class_loader.get());
+  mirror::Class* statics = class_linker_->FindClass("LStatics;", class_loader);
   class_linker_->EnsureInitialized(statics, true, true);
 
   // Static final primitives that are initialized by a compile-time constant
@@ -932,11 +934,11 @@
 TEST_F(ClassLinkerTest, Interfaces) {
   ScopedObjectAccess soa(Thread::Current());
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Interfaces")));
-  mirror::Class* I = class_linker_->FindClass("LInterfaces$I;", class_loader.get());
-  mirror::Class* J = class_linker_->FindClass("LInterfaces$J;", class_loader.get());
-  mirror::Class* K = class_linker_->FindClass("LInterfaces$K;", class_loader.get());
-  mirror::Class* A = class_linker_->FindClass("LInterfaces$A;", class_loader.get());
-  mirror::Class* B = class_linker_->FindClass("LInterfaces$B;", class_loader.get());
+  mirror::Class* I = class_linker_->FindClass("LInterfaces$I;", class_loader);
+  mirror::Class* J = class_linker_->FindClass("LInterfaces$J;", class_loader);
+  mirror::Class* K = class_linker_->FindClass("LInterfaces$K;", class_loader);
+  mirror::Class* A = class_linker_->FindClass("LInterfaces$A;", class_loader);
+  mirror::Class* B = class_linker_->FindClass("LInterfaces$B;", class_loader);
   EXPECT_TRUE(I->IsAssignableFrom(A));
   EXPECT_TRUE(J->IsAssignableFrom(A));
   EXPECT_TRUE(J->IsAssignableFrom(K));
@@ -995,8 +997,7 @@
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
   const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(jclass_loader)[0];
   CHECK(dex_file != NULL);
-
-  mirror::Class* klass = class_linker_->FindClass("LStaticsFromCode;", class_loader.get());
+  mirror::Class* klass = class_linker_->FindClass("LStaticsFromCode;", class_loader);
   mirror::ArtMethod* clinit = klass->FindClassInitializer();
   mirror::ArtMethod* getS0 = klass->FindDirectMethod("getS0", "()Ljava/lang/Object;");
   const DexFile::StringId* string_id = dex_file->FindStringId("LStaticsFromCode;");
@@ -1049,10 +1050,9 @@
 
 TEST_F(ClassLinkerTest, ClassRootDescriptors) {
   ScopedObjectAccess soa(Thread::Current());
-  ClassHelper kh;
   for (int i = 0; i < ClassLinker::kClassRootsMax; i++) {
     mirror::Class* klass = class_linker_->GetClassRoot(ClassLinker::ClassRoot(i));
-    kh.ChangeClass(klass);
+    ClassHelper kh(klass);
     EXPECT_TRUE(kh.GetDescriptor() != NULL);
     EXPECT_STREQ(kh.GetDescriptor(),
                  class_linker_->GetClassRootDescriptor(ClassLinker::ClassRoot(i))) << " i = " << i;
diff --git a/runtime/common_test.h b/runtime/common_test.h
index 643ed1d..57cf71a 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -237,7 +237,6 @@
 // input 'str' is a comma separated list of feature names.  Parse it and
 // return the InstructionSetFeatures object.
 static InstructionSetFeatures ParseFeatureList(std::string str) {
-  LOG(INFO) << "Parsing features " << str;
   InstructionSetFeatures result;
   typedef std::vector<std::string> FeatureList;
   FeatureList features;
@@ -569,7 +568,8 @@
   void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
-    mirror::Class* klass = class_linker_->FindClass(class_descriptor.c_str(), class_loader);
+    SirtRef<mirror::ClassLoader> loader(Thread::Current(), class_loader);
+    mirror::Class* klass = class_linker_->FindClass(class_descriptor.c_str(), loader);
     CHECK(klass != NULL) << "Class not found " << class_name;
     for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
       CompileMethod(klass->GetDirectMethod(i));
@@ -581,16 +581,15 @@
 
   void CompileMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(method != NULL);
-    base::TimingLogger timings("CommonTest::CompileMethod", false, false);
+    TimingLogger timings("CommonTest::CompileMethod", false, false);
     timings.StartSplit("CompileOne");
     compiler_driver_->CompileOne(method, timings);
     MakeExecutable(method);
+    timings.EndSplit();
   }
 
-  void CompileDirectMethod(mirror::ClassLoader* class_loader,
-                           const char* class_name,
-                           const char* method_name,
-                           const char* signature)
+  void CompileDirectMethod(SirtRef<mirror::ClassLoader>& class_loader, const char* class_name,
+                           const char* method_name, const char* signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
     mirror::Class* klass = class_linker_->FindClass(class_descriptor.c_str(), class_loader);
@@ -601,10 +600,8 @@
     CompileMethod(method);
   }
 
-  void CompileVirtualMethod(mirror::ClassLoader* class_loader,
-                            const char* class_name,
-                            const char* method_name,
-                            const char* signature)
+  void CompileVirtualMethod(SirtRef<mirror::ClassLoader>& class_loader, const char* class_name,
+                            const char* method_name, const char* signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
     mirror::Class* klass = class_linker_->FindClass(class_descriptor.c_str(), class_loader);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index bdcf6ac..6c4d130 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -95,20 +95,6 @@
   return os;
 }
 
-struct SingleStepControl {
-  // Are we single-stepping right now?
-  bool is_active;
-  Thread* thread;
-
-  JDWP::JdwpStepSize step_size;
-  JDWP::JdwpStepDepth step_depth;
-
-  const mirror::ArtMethod* method;
-  int32_t line_number;  // Or -1 for native methods.
-  std::set<uint32_t> dex_pcs;
-  int stack_depth;
-};
-
 class DebugInstrumentationListener : public instrumentation::InstrumentationListener {
  public:
   DebugInstrumentationListener() {}
@@ -121,26 +107,26 @@
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::PostLocationEvent(method, 0, this_object, Dbg::kMethodEntry);
+    Dbg::PostLocationEvent(method, 0, this_object, Dbg::kMethodEntry, nullptr);
   }
 
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
                             const mirror::ArtMethod* method,
                             uint32_t dex_pc, const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    UNUSED(return_value);
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::PostLocationEvent(method, dex_pc, this_object, Dbg::kMethodExit);
+    Dbg::PostLocationEvent(method, dex_pc, this_object, Dbg::kMethodExit, &return_value);
   }
 
-  virtual void MethodUnwind(Thread* thread, const mirror::ArtMethod* method,
-                            uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                            const mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
-        << " " << dex_pc;
+               << " " << dex_pc;
   }
 
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
@@ -192,7 +178,6 @@
 
 // Breakpoints and single-stepping.
 static std::vector<Breakpoint> gBreakpoints GUARDED_BY(Locks::breakpoint_lock_);
-static SingleStepControl gSingleStepControl GUARDED_BY(Locks::breakpoint_lock_);
 
 static bool IsBreakpoint(const mirror::ArtMethod* m, uint32_t dex_pc)
     LOCKS_EXCLUDED(Locks::breakpoint_lock_)
@@ -1118,7 +1103,7 @@
   if (c == NULL) {
     return status;
   }
-  new_array = gRegistry->Add(mirror::Array::Alloc(Thread::Current(), c, length));
+  new_array = gRegistry->Add(mirror::Array::Alloc<true>(Thread::Current(), c, length));
   return JDWP::ERR_NONE;
 }
 
@@ -1133,38 +1118,26 @@
 
 static JDWP::FieldId ToFieldId(const mirror::ArtField* f)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifdef MOVING_GARBAGE_COLLECTOR
-  UNIMPLEMENTED(FATAL);
-#else
+  CHECK(!kMovingFields);
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
-#endif
 }
 
 static JDWP::MethodId ToMethodId(const mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifdef MOVING_GARBAGE_COLLECTOR
-  UNIMPLEMENTED(FATAL);
-#else
+  CHECK(!kMovingMethods);
   return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(m));
-#endif
 }
 
 static mirror::ArtField* FromFieldId(JDWP::FieldId fid)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifdef MOVING_GARBAGE_COLLECTOR
-  UNIMPLEMENTED(FATAL);
-#else
+  CHECK(!kMovingFields);
   return reinterpret_cast<mirror::ArtField*>(static_cast<uintptr_t>(fid));
-#endif
 }
 
 static mirror::ArtMethod* FromMethodId(JDWP::MethodId mid)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifdef MOVING_GARBAGE_COLLECTOR
-  UNIMPLEMENTED(FATAL);
-#else
+  CHECK(!kMovingMethods);
   return reinterpret_cast<mirror::ArtMethod*>(static_cast<uintptr_t>(mid));
-#endif
 }
 
 static void SetLocation(JDWP::JdwpLocation& location, mirror::ArtMethod* m, uint32_t dex_pc)
@@ -1205,41 +1178,37 @@
   return accessFlags;
 }
 
-static const uint16_t kEclipseWorkaroundSlot = 1000;
-
 /*
- * Eclipse appears to expect that the "this" reference is in slot zero.
- * If it's not, the "variables" display will show two copies of "this",
- * possibly because it gets "this" from SF.ThisObject and then displays
- * all locals with nonzero slot numbers.
- *
- * So, we remap the item in slot 0 to 1000, and remap "this" to zero.  On
- * SF.GetValues / SF.SetValues we map them back.
- *
- * TODO: jdb uses the value to determine whether a variable is a local or an argument,
- * by checking whether it's less than the number of arguments. To make that work, we'd
- * have to "mangle" all the arguments to come first, not just the implicit argument 'this'.
+ * Circularly shifts registers so that arguments come first. Debuggers
+ * expect slots to begin with arguments, but dex code places them at
+ * the end.
  */
-static uint16_t MangleSlot(uint16_t slot, const char* name) {
-  uint16_t newSlot = slot;
-  if (strcmp(name, "this") == 0) {
-    newSlot = 0;
-  } else if (slot == 0) {
-    newSlot = kEclipseWorkaroundSlot;
+static uint16_t MangleSlot(uint16_t slot, mirror::ArtMethod* m)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+  uint16_t ins_size = code_item->ins_size_;
+  uint16_t locals_size = code_item->registers_size_ - ins_size;
+  if (slot >= locals_size) {
+    return slot - locals_size;
+  } else {
+    return slot + ins_size;
   }
-  return newSlot;
 }
 
+/*
+ * Circularly shifts registers so that arguments come last. Reverts
+ * slots to dex style argument placement.
+ */
 static uint16_t DemangleSlot(uint16_t slot, mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (slot == kEclipseWorkaroundSlot) {
-    return 0;
-  } else if (slot == 0) {
-    const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
-    CHECK(code_item != NULL) << PrettyMethod(m);
-    return code_item->registers_size_ - code_item->ins_size_;
+  const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+  uint16_t ins_size = code_item->ins_size_;
+  uint16_t locals_size = code_item->registers_size_ - ins_size;
+  if (slot < ins_size) {
+    return slot + locals_size;
+  } else {
+    return slot - ins_size;
   }
-  return slot;
 }
 
 JDWP::JdwpError Dbg::OutputDeclaredFields(JDWP::RefTypeId class_id, bool with_generic, JDWP::ExpandBuf* pReply) {
@@ -1358,16 +1327,18 @@
 
 void Dbg::OutputVariableTable(JDWP::RefTypeId, JDWP::MethodId method_id, bool with_generic, JDWP::ExpandBuf* pReply) {
   struct DebugCallbackContext {
+    mirror::ArtMethod* method;
     JDWP::ExpandBuf* pReply;
     size_t variable_count;
     bool with_generic;
 
-    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress, const char* name, const char* descriptor, const char* signature) {
+    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress, const char* name, const char* descriptor, const char* signature)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
 
-      VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d", pContext->variable_count, startAddress, endAddress - startAddress, name, descriptor, signature, slot, MangleSlot(slot, name));
+      VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d", pContext->variable_count, startAddress, endAddress - startAddress, name, descriptor, signature, slot, MangleSlot(slot, pContext->method));
 
-      slot = MangleSlot(slot, name);
+      slot = MangleSlot(slot, pContext->method);
 
       expandBufAdd8BE(pContext->pReply, startAddress);
       expandBufAddUtf8String(pContext->pReply, name);
@@ -1395,6 +1366,7 @@
   expandBufAdd4BE(pReply, 0);
 
   DebugCallbackContext context;
+  context.method = m;
   context.pReply = pReply;
   context.variable_count = 0;
   context.with_generic = with_generic;
@@ -1405,6 +1377,13 @@
   JDWP::Set4BE(expandBufGetBuffer(pReply) + variable_count_offset, context.variable_count);
 }
 
+void Dbg::OutputMethodReturnValue(JDWP::MethodId method_id, const JValue* return_value,
+                                  JDWP::ExpandBuf* pReply) {
+  mirror::ArtMethod* m = FromMethodId(method_id);
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(MethodHelper(m).GetShorty());
+  OutputJValue(tag, return_value, pReply);
+}
+
 JDWP::JdwpError Dbg::GetBytecodes(JDWP::RefTypeId, JDWP::MethodId method_id,
                                   std::vector<uint8_t>& bytecodes)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1473,25 +1452,18 @@
   }
 
   JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
-
-  if (IsPrimitiveTag(tag)) {
-    expandBufAdd1(pReply, tag);
-    if (tag == JDWP::JT_BOOLEAN || tag == JDWP::JT_BYTE) {
-      expandBufAdd1(pReply, f->Get32(o));
-    } else if (tag == JDWP::JT_CHAR || tag == JDWP::JT_SHORT) {
-      expandBufAdd2BE(pReply, f->Get32(o));
-    } else if (tag == JDWP::JT_FLOAT || tag == JDWP::JT_INT) {
-      expandBufAdd4BE(pReply, f->Get32(o));
-    } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
-      expandBufAdd8BE(pReply, f->Get64(o));
-    } else {
-      LOG(FATAL) << "Unknown tag: " << tag;
-    }
+  JValue field_value;
+  if (tag == JDWP::JT_VOID) {
+    LOG(FATAL) << "Unknown tag: " << tag;
+  } else if (!IsPrimitiveTag(tag)) {
+    field_value.SetL(f->GetObject(o));
+  } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
+    field_value.SetJ(f->Get64(o));
   } else {
-    mirror::Object* value = f->GetObject(o);
-    expandBufAdd1(pReply, TagFromObject(value));
-    expandBufAddObjectId(pReply, gRegistry->Add(value));
+    field_value.SetI(f->Get32(o));
   }
+  Dbg::OutputJValue(tag, &field_value, pReply);
+
   return JDWP::ERR_NONE;
 }
 
@@ -1569,6 +1541,27 @@
   return s->ToModifiedUtf8();
 }
 
+void Dbg::OutputJValue(JDWP::JdwpTag tag, const JValue* return_value, JDWP::ExpandBuf* pReply) {
+  if (IsPrimitiveTag(tag)) {
+    expandBufAdd1(pReply, tag);
+    if (tag == JDWP::JT_BOOLEAN || tag == JDWP::JT_BYTE) {
+      expandBufAdd1(pReply, return_value->GetI());
+    } else if (tag == JDWP::JT_CHAR || tag == JDWP::JT_SHORT) {
+      expandBufAdd2BE(pReply, return_value->GetI());
+    } else if (tag == JDWP::JT_FLOAT || tag == JDWP::JT_INT) {
+      expandBufAdd4BE(pReply, return_value->GetI());
+    } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
+      expandBufAdd8BE(pReply, return_value->GetJ());
+    } else {
+      CHECK_EQ(tag, JDWP::JT_VOID);
+    }
+  } else {
+    mirror::Object* value = return_value->GetL();
+    expandBufAdd1(pReply, TagFromObject(value));
+    expandBufAddObjectId(pReply, gRegistry->Add(value));
+  }
+}
+
 JDWP::JdwpError Dbg::GetThreadName(JDWP::ObjectId thread_id, std::string& name) {
   ScopedObjectAccessUnchecked soa(Thread::Current());
   MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
@@ -2079,7 +2072,7 @@
           CHECK_EQ(width_, sizeof(JDWP::ObjectId));
           mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
           VLOG(jdwp) << "get array local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
+          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
             LOG(FATAL) << "Register " << reg << " expected to hold array: " << o;
           }
           JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
@@ -2095,7 +2088,7 @@
           CHECK_EQ(width_, sizeof(JDWP::ObjectId));
           mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
           VLOG(jdwp) << "get object local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
+          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
             LOG(FATAL) << "Register " << reg << " expected to hold object: " << o;
           }
           tag_ = TagFromObject(o);
@@ -2238,8 +2231,8 @@
   visitor.WalkStack();
 }
 
-void Dbg::PostLocationEvent(const mirror::ArtMethod* m, int dex_pc,
-                            mirror::Object* this_object, int event_flags) {
+void Dbg::PostLocationEvent(const mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
+                            int event_flags, const JValue* return_value) {
   mirror::Class* c = m->GetDeclaringClass();
 
   JDWP::JdwpLocation location;
@@ -2254,7 +2247,7 @@
   if (gRegistry->Contains(this_object)) {
     this_id = gRegistry->Add(this_object);
   }
-  gJdwpState->PostLocationEvent(&location, this_id, event_flags);
+  gJdwpState->PostLocationEvent(&location, this_id, event_flags, return_value);
 }
 
 void Dbg::PostException(Thread* thread, const ThrowLocation& throw_location,
@@ -2304,63 +2297,62 @@
     event_flags |= kBreakpoint;
   }
 
-  {
-    // If the debugger is single-stepping one of our threads, check to
-    // see if we're that thread and we've reached a step point.
-    MutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
-    if (gSingleStepControl.is_active && gSingleStepControl.thread == thread) {
-      CHECK(!m->IsNative());
-      if (gSingleStepControl.step_depth == JDWP::SD_INTO) {
-        // Step into method calls.  We break when the line number
-        // or method pointer changes.  If we're in SS_MIN mode, we
-        // always stop.
-        if (gSingleStepControl.method != m) {
-          event_flags |= kSingleStep;
-          VLOG(jdwp) << "SS new method";
-        } else if (gSingleStepControl.step_size == JDWP::SS_MIN) {
+  // If the debugger is single-stepping one of our threads, check to
+  // see if we're that thread and we've reached a step point.
+  const SingleStepControl* single_step_control = thread->GetSingleStepControl();
+  DCHECK(single_step_control != nullptr);
+  if (single_step_control->is_active) {
+    CHECK(!m->IsNative());
+    if (single_step_control->step_depth == JDWP::SD_INTO) {
+      // Step into method calls.  We break when the line number
+      // or method pointer changes.  If we're in SS_MIN mode, we
+      // always stop.
+      if (single_step_control->method != m) {
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS new method";
+      } else if (single_step_control->step_size == JDWP::SS_MIN) {
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS new instruction";
+      } else if (single_step_control->dex_pcs.find(dex_pc) == single_step_control->dex_pcs.end()) {
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS new line";
+      }
+    } else if (single_step_control->step_depth == JDWP::SD_OVER) {
+      // Step over method calls.  We break when the line number is
+      // different and the frame depth is <= the original frame
+      // depth.  (We can't just compare on the method, because we
+      // might get unrolled past it by an exception, and it's tricky
+      // to identify recursion.)
+
+      int stack_depth = GetStackDepth(thread);
+
+      if (stack_depth < single_step_control->stack_depth) {
+        // Popped up one or more frames, always trigger.
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS method pop";
+      } else if (stack_depth == single_step_control->stack_depth) {
+        // Same depth, see if we moved.
+        if (single_step_control->step_size == JDWP::SS_MIN) {
           event_flags |= kSingleStep;
           VLOG(jdwp) << "SS new instruction";
-        } else if (gSingleStepControl.dex_pcs.find(dex_pc) == gSingleStepControl.dex_pcs.end()) {
+        } else if (single_step_control->dex_pcs.find(dex_pc) == single_step_control->dex_pcs.end()) {
           event_flags |= kSingleStep;
           VLOG(jdwp) << "SS new line";
         }
-      } else if (gSingleStepControl.step_depth == JDWP::SD_OVER) {
-        // Step over method calls.  We break when the line number is
-        // different and the frame depth is <= the original frame
-        // depth.  (We can't just compare on the method, because we
-        // might get unrolled past it by an exception, and it's tricky
-        // to identify recursion.)
+      }
+    } else {
+      CHECK_EQ(single_step_control->step_depth, JDWP::SD_OUT);
+      // Return from the current method.  We break when the frame
+      // depth pops up.
 
-        int stack_depth = GetStackDepth(thread);
+      // This differs from the "method exit" break in that it stops
+      // with the PC at the next instruction in the returned-to
+      // function, rather than the end of the returning function.
 
-        if (stack_depth < gSingleStepControl.stack_depth) {
-          // popped up one or more frames, always trigger
-          event_flags |= kSingleStep;
-          VLOG(jdwp) << "SS method pop";
-        } else if (stack_depth == gSingleStepControl.stack_depth) {
-          // same depth, see if we moved
-          if (gSingleStepControl.step_size == JDWP::SS_MIN) {
-            event_flags |= kSingleStep;
-            VLOG(jdwp) << "SS new instruction";
-          } else if (gSingleStepControl.dex_pcs.find(dex_pc) == gSingleStepControl.dex_pcs.end()) {
-            event_flags |= kSingleStep;
-            VLOG(jdwp) << "SS new line";
-          }
-        }
-      } else {
-        CHECK_EQ(gSingleStepControl.step_depth, JDWP::SD_OUT);
-        // Return from the current method.  We break when the frame
-        // depth pops up.
-
-        // This differs from the "method exit" break in that it stops
-        // with the PC at the next instruction in the returned-to
-        // function, rather than the end of the returning function.
-
-        int stack_depth = GetStackDepth(thread);
-        if (stack_depth < gSingleStepControl.stack_depth) {
-          event_flags |= kSingleStep;
-          VLOG(jdwp) << "SS method pop";
-        }
+      int stack_depth = GetStackDepth(thread);
+      if (stack_depth < single_step_control->stack_depth) {
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS method pop";
       }
     }
   }
@@ -2368,7 +2360,7 @@
   // If there's something interesting going on, see if it matches one
   // of the debugger filters.
   if (event_flags != 0) {
-    Dbg::PostLocationEvent(m, dex_pc, this_object, event_flags);
+    Dbg::PostLocationEvent(m, dex_pc, this_object, event_flags, nullptr);
   }
 }
 
@@ -2456,50 +2448,50 @@
     return sts.GetError();
   }
 
-  MutexLock mu2(self, *Locks::breakpoint_lock_);
-  // TODO: there's no theoretical reason why we couldn't support single-stepping
-  // of multiple threads at once, but we never did so historically.
-  if (gSingleStepControl.thread != NULL && sts.GetThread() != gSingleStepControl.thread) {
-    LOG(WARNING) << "single-step already active for " << *gSingleStepControl.thread
-                 << "; switching to " << *sts.GetThread();
-  }
-
   //
   // Work out what Method* we're in, the current line number, and how deep the stack currently
   // is for step-out.
   //
 
   struct SingleStepStackVisitor : public StackVisitor {
-    explicit SingleStepStackVisitor(Thread* thread)
-        EXCLUSIVE_LOCKS_REQUIRED(Locks::breakpoint_lock_)
+    explicit SingleStepStackVisitor(Thread* thread, SingleStepControl* single_step_control,
+                                    int32_t* line_number)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, NULL) {
-      gSingleStepControl.method = NULL;
-      gSingleStepControl.stack_depth = 0;
+        : StackVisitor(thread, NULL), single_step_control_(single_step_control),
+          line_number_(line_number) {
+      DCHECK_EQ(single_step_control_, thread->GetSingleStepControl());
+      single_step_control_->method = NULL;
+      single_step_control_->stack_depth = 0;
     }
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
     // annotalysis.
     bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
-      Locks::breakpoint_lock_->AssertHeld(Thread::Current());
-      const mirror::ArtMethod* m = GetMethod();
+      mirror::ArtMethod* m = GetMethod();
       if (!m->IsRuntimeMethod()) {
-        ++gSingleStepControl.stack_depth;
-        if (gSingleStepControl.method == NULL) {
+        ++single_step_control_->stack_depth;
+        if (single_step_control_->method == NULL) {
           const mirror::DexCache* dex_cache = m->GetDeclaringClass()->GetDexCache();
-          gSingleStepControl.method = m;
-          gSingleStepControl.line_number = -1;
+          single_step_control_->method = m;
+          *line_number_ = -1;
           if (dex_cache != NULL) {
             const DexFile& dex_file = *dex_cache->GetDexFile();
-            gSingleStepControl.line_number = dex_file.GetLineNumFromPC(m, GetDexPc());
+            *line_number_ = dex_file.GetLineNumFromPC(m, GetDexPc());
           }
         }
       }
       return true;
     }
+
+    SingleStepControl* const single_step_control_;
+    int32_t* const line_number_;
   };
 
-  SingleStepStackVisitor visitor(sts.GetThread());
+  Thread* const thread = sts.GetThread();
+  SingleStepControl* const single_step_control = thread->GetSingleStepControl();
+  DCHECK(single_step_control != nullptr);
+  int32_t line_number = -1;
+  SingleStepStackVisitor visitor(thread, single_step_control, &line_number);
   visitor.WalkStack();
 
   //
@@ -2507,17 +2499,14 @@
   //
 
   struct DebugCallbackContext {
-    DebugCallbackContext() EXCLUSIVE_LOCKS_REQUIRED(Locks::breakpoint_lock_) {
-      last_pc_valid = false;
-      last_pc = 0;
+    explicit DebugCallbackContext(SingleStepControl* single_step_control, int32_t line_number)
+      : single_step_control_(single_step_control), line_number_(line_number),
+        last_pc_valid(false), last_pc(0) {
     }
 
-    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
-    // annotalysis.
-    static bool Callback(void* raw_context, uint32_t address, uint32_t line_number) NO_THREAD_SAFETY_ANALYSIS {
-      Locks::breakpoint_lock_->AssertHeld(Thread::Current());
+    static bool Callback(void* raw_context, uint32_t address, uint32_t line_number) {
       DebugCallbackContext* context = reinterpret_cast<DebugCallbackContext*>(raw_context);
-      if (static_cast<int32_t>(line_number) == gSingleStepControl.line_number) {
+      if (static_cast<int32_t>(line_number) == context->line_number_) {
         if (!context->last_pc_valid) {
           // Everything from this address until the next line change is ours.
           context->last_pc = address;
@@ -2528,35 +2517,32 @@
       } else if (context->last_pc_valid) {  // and the line number is new
         // Add everything from the last entry up until here to the set
         for (uint32_t dex_pc = context->last_pc; dex_pc < address; ++dex_pc) {
-          gSingleStepControl.dex_pcs.insert(dex_pc);
+          context->single_step_control_->dex_pcs.insert(dex_pc);
         }
         context->last_pc_valid = false;
       }
       return false;  // There may be multiple entries for any given line.
     }
 
-    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
-    // annotalysis.
-    ~DebugCallbackContext() NO_THREAD_SAFETY_ANALYSIS {
-      Locks::breakpoint_lock_->AssertHeld(Thread::Current());
+    ~DebugCallbackContext() {
       // If the line number was the last in the position table...
       if (last_pc_valid) {
-        size_t end = MethodHelper(gSingleStepControl.method).GetCodeItem()->insns_size_in_code_units_;
+        size_t end = MethodHelper(single_step_control_->method).GetCodeItem()->insns_size_in_code_units_;
         for (uint32_t dex_pc = last_pc; dex_pc < end; ++dex_pc) {
-          gSingleStepControl.dex_pcs.insert(dex_pc);
+          single_step_control_->dex_pcs.insert(dex_pc);
         }
       }
     }
 
+    SingleStepControl* const single_step_control_;
+    const int32_t line_number_;
     bool last_pc_valid;
     uint32_t last_pc;
   };
-  gSingleStepControl.dex_pcs.clear();
-  const mirror::ArtMethod* m = gSingleStepControl.method;
-  if (m->IsNative()) {
-    gSingleStepControl.line_number = -1;
-  } else {
-    DebugCallbackContext context;
+  single_step_control->dex_pcs.clear();
+  const mirror::ArtMethod* m = single_step_control->method;
+  if (!m->IsNative()) {
+    DebugCallbackContext context(single_step_control, line_number);
     MethodHelper mh(m);
     mh.GetDexFile().DecodeDebugInfo(mh.GetCodeItem(), m->IsStatic(), m->GetDexMethodIndex(),
                                     DebugCallbackContext::Callback, NULL, &context);
@@ -2566,20 +2552,19 @@
   // Everything else...
   //
 
-  gSingleStepControl.thread = sts.GetThread();
-  gSingleStepControl.step_size = step_size;
-  gSingleStepControl.step_depth = step_depth;
-  gSingleStepControl.is_active = true;
+  single_step_control->step_size = step_size;
+  single_step_control->step_depth = step_depth;
+  single_step_control->is_active = true;
 
   if (VLOG_IS_ON(jdwp)) {
-    VLOG(jdwp) << "Single-step thread: " << *gSingleStepControl.thread;
-    VLOG(jdwp) << "Single-step step size: " << gSingleStepControl.step_size;
-    VLOG(jdwp) << "Single-step step depth: " << gSingleStepControl.step_depth;
-    VLOG(jdwp) << "Single-step current method: " << PrettyMethod(gSingleStepControl.method);
-    VLOG(jdwp) << "Single-step current line: " << gSingleStepControl.line_number;
-    VLOG(jdwp) << "Single-step current stack depth: " << gSingleStepControl.stack_depth;
+    VLOG(jdwp) << "Single-step thread: " << *thread;
+    VLOG(jdwp) << "Single-step step size: " << single_step_control->step_size;
+    VLOG(jdwp) << "Single-step step depth: " << single_step_control->step_depth;
+    VLOG(jdwp) << "Single-step current method: " << PrettyMethod(single_step_control->method);
+    VLOG(jdwp) << "Single-step current line: " << line_number;
+    VLOG(jdwp) << "Single-step current stack depth: " << single_step_control->stack_depth;
     VLOG(jdwp) << "Single-step dex_pc values:";
-    for (std::set<uint32_t>::iterator it = gSingleStepControl.dex_pcs.begin() ; it != gSingleStepControl.dex_pcs.end(); ++it) {
+    for (std::set<uint32_t>::iterator it = single_step_control->dex_pcs.begin(); it != single_step_control->dex_pcs.end(); ++it) {
       VLOG(jdwp) << StringPrintf(" %#x", *it);
     }
   }
@@ -2587,12 +2572,17 @@
   return JDWP::ERR_NONE;
 }
 
-void Dbg::UnconfigureStep(JDWP::ObjectId /*thread_id*/) {
-  MutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
-
-  gSingleStepControl.is_active = false;
-  gSingleStepControl.thread = NULL;
-  gSingleStepControl.dex_pcs.clear();
+void Dbg::UnconfigureStep(JDWP::ObjectId thread_id) {
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
+  Thread* thread;
+  JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
+  if (error != JDWP::ERR_NONE) {
+    SingleStepControl* single_step_control = thread->GetSingleStepControl();
+    DCHECK(single_step_control != nullptr);
+    single_step_control->is_active = false;
+    single_step_control->dex_pcs.clear();
+  }
 }
 
 static char JdwpTagToShortyChar(JDWP::JdwpTag tag) {
@@ -2732,14 +2722,14 @@
       }
     }
 
-    req->receiver_ = receiver;
-    req->thread_ = thread;
-    req->class_ = c;
-    req->method_ = m;
-    req->arg_count_ = arg_count;
-    req->arg_values_ = arg_values;
-    req->options_ = options;
-    req->invoke_needed_ = true;
+    req->receiver = receiver;
+    req->thread = thread;
+    req->klass = c;
+    req->method = m;
+    req->arg_count = arg_count;
+    req->arg_values = arg_values;
+    req->options = options;
+    req->invoke_needed = true;
   }
 
   // The fact that we've released the thread list lock is a bit risky --- if the thread goes
@@ -2757,7 +2747,7 @@
 
     VLOG(jdwp) << "    Transferring control to event thread";
     {
-      MutexLock mu(self, req->lock_);
+      MutexLock mu(self, req->lock);
 
       if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
         VLOG(jdwp) << "      Resuming all threads";
@@ -2768,8 +2758,8 @@
       }
 
       // Wait for the request to finish executing.
-      while (req->invoke_needed_) {
-        req->cond_.Wait(self);
+      while (req->invoke_needed) {
+        req->cond.Wait(self);
       }
     }
     VLOG(jdwp) << "    Control has returned from event thread";
@@ -2826,24 +2816,24 @@
   }
 
   // Translate the method through the vtable, unless the debugger wants to suppress it.
-  mirror::ArtMethod* m = pReq->method_;
-  if ((pReq->options_ & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver_ != NULL) {
-    mirror::ArtMethod* actual_method = pReq->class_->FindVirtualMethodForVirtualOrInterface(pReq->method_);
+  mirror::ArtMethod* m = pReq->method;
+  if ((pReq->options & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver != NULL) {
+    mirror::ArtMethod* actual_method = pReq->klass->FindVirtualMethodForVirtualOrInterface(pReq->method);
     if (actual_method != m) {
       VLOG(jdwp) << "ExecuteMethod translated " << PrettyMethod(m) << " to " << PrettyMethod(actual_method);
       m = actual_method;
     }
   }
   VLOG(jdwp) << "ExecuteMethod " << PrettyMethod(m)
-             << " receiver=" << pReq->receiver_
-             << " arg_count=" << pReq->arg_count_;
+             << " receiver=" << pReq->receiver
+             << " arg_count=" << pReq->arg_count;
   CHECK(m != NULL);
 
   CHECK_EQ(sizeof(jvalue), sizeof(uint64_t));
 
   MethodHelper mh(m);
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
-  arg_array.BuildArgArray(soa, pReq->receiver_, reinterpret_cast<jvalue*>(pReq->arg_values_));
+  arg_array.BuildArgArray(soa, pReq->receiver, reinterpret_cast<jvalue*>(pReq->arg_values));
   InvokeWithArgArray(soa, m, &arg_array, &pReq->result_value, mh.GetShorty()[0]);
 
   mirror::Throwable* exception = soa.Self()->GetException(NULL);
@@ -3372,7 +3362,7 @@
       return HPSG_STATE(SOLIDITY_HARD, KIND_OBJECT);
     }
 
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(c)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(c)) {
       LOG(ERROR) << "Invalid class for managed heap object: " << o << " " << c;
       return HPSG_STATE(SOLIDITY_HARD, KIND_UNKNOWN);
     }
@@ -3430,6 +3420,14 @@
   JDWP::Set4BE(&heap_id[0], 1);  // Heap id (bogus; we only have one heap).
   Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHST") : CHUNK_TYPE("HPST"), sizeof(heap_id), heap_id);
 
+  Thread* self = Thread::Current();
+
+  // To allow the Walk/InspectAll() below to exclusively-lock the
+  // mutator lock, temporarily release the shared access to the
+  // mutator lock here by transitioning to the suspended state.
+  Locks::mutator_lock_->AssertSharedHeld(self);
+  self->TransitionFromRunnableToSuspended(kSuspended);
+
   // Send a series of heap segment chunks.
   HeapChunkContext context((what == HPSG_WHAT_MERGED_OBJECTS), native);
   if (native) {
@@ -3437,18 +3435,21 @@
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
-    Thread* self = Thread::Current();
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     typedef std::vector<gc::space::ContinuousSpace*>::const_iterator It;
     for (It cur = spaces.begin(), end = spaces.end(); cur != end; ++cur) {
-      if ((*cur)->IsDlMallocSpace()) {
-        (*cur)->AsDlMallocSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
+      if ((*cur)->IsMallocSpace()) {
+        (*cur)->AsMallocSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
       }
     }
     // Walk the large objects, these are not in the AllocSpace.
     heap->GetLargeObjectsSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
   }
 
+  // Shared-lock the mutator lock back.
+  self->TransitionFromSuspendedToRunnable();
+  Locks::mutator_lock_->AssertSharedHeld(self);
+
   // Finally, send a heap end chunk.
   Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHEN") : CHUNK_TYPE("HPEN"), sizeof(heap_id), heap_id);
 }
@@ -3489,9 +3490,9 @@
       recent_allocation_records_ = new AllocRecord[gAllocRecordMax];
       CHECK(recent_allocation_records_ != NULL);
     }
-    Runtime::Current()->InstrumentQuickAllocEntryPoints();
+    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
   } else {
-    Runtime::Current()->UninstrumentQuickAllocEntryPoints();
+    Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints();
     delete[] recent_allocation_records_;
     recent_allocation_records_ = NULL;
   }
@@ -3752,7 +3753,6 @@
 
     count = gAllocRecordCount;
     idx = HeadIndex();
-    ClassHelper kh;
     while (count--) {
       // For each entry:
       // (4b) total allocation size
@@ -3761,7 +3761,7 @@
       // (1b) stack depth
       AllocRecord* record = &recent_allocation_records_[idx];
       size_t stack_depth = record->GetDepth();
-      kh.ChangeClass(record->type);
+      ClassHelper kh(record->type);
       size_t allocated_object_class_name_index = class_names.IndexOf(kh.GetDescriptor());
       JDWP::Append4BE(bytes, record->byte_count);
       JDWP::Append2BE(bytes, record->thin_lock_id);
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 8574a33..acbb2c6 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -23,6 +23,7 @@
 
 #include <pthread.h>
 
+#include <set>
 #include <string>
 
 #include "jdwp/jdwp.h"
@@ -47,28 +48,28 @@
  */
 struct DebugInvokeReq {
   DebugInvokeReq()
-      : ready(false), invoke_needed_(false),
-        receiver_(NULL), thread_(NULL), class_(NULL), method_(NULL),
-        arg_count_(0), arg_values_(NULL), options_(0), error(JDWP::ERR_NONE),
+      : ready(false), invoke_needed(false),
+        receiver(NULL), thread(NULL), klass(NULL), method(NULL),
+        arg_count(0), arg_values(NULL), options(0), error(JDWP::ERR_NONE),
         result_tag(JDWP::JT_VOID), exception(0),
-        lock_("a DebugInvokeReq lock", kBreakpointInvokeLock),
-        cond_("a DebugInvokeReq condition variable", lock_) {
+        lock("a DebugInvokeReq lock", kBreakpointInvokeLock),
+        cond("a DebugInvokeReq condition variable", lock) {
   }
 
   /* boolean; only set when we're in the tail end of an event handler */
   bool ready;
 
   /* boolean; set if the JDWP thread wants this thread to do work */
-  bool invoke_needed_;
+  bool invoke_needed;
 
   /* request */
-  mirror::Object* receiver_;      /* not used for ClassType.InvokeMethod */
-  mirror::Object* thread_;
-  mirror::Class* class_;
-  mirror::ArtMethod* method_;
-  uint32_t arg_count_;
-  uint64_t* arg_values_;   /* will be NULL if arg_count_ == 0 */
-  uint32_t options_;
+  mirror::Object* receiver;      /* not used for ClassType.InvokeMethod */
+  mirror::Object* thread;
+  mirror::Class* klass;
+  mirror::ArtMethod* method;
+  uint32_t arg_count;
+  uint64_t* arg_values;   /* will be NULL if arg_count_ == 0 */
+  uint32_t options;
 
   /* result */
   JDWP::JdwpError error;
@@ -77,8 +78,41 @@
   JDWP::ObjectId exception;
 
   /* condition variable to wait on while the method executes */
-  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  ConditionVariable cond_ GUARDED_BY(lock_);
+  Mutex lock DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable cond GUARDED_BY(lock);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DebugInvokeReq);
+};
+
+// Thread local data-structure that holds fields for controlling single-stepping.
+struct SingleStepControl {
+  SingleStepControl()
+      : is_active(false), step_size(JDWP::SS_MIN), step_depth(JDWP::SD_INTO),
+        method(nullptr), stack_depth(0) {
+  }
+
+  // Are we single-stepping right now?
+  bool is_active;
+
+  // See JdwpStepSize and JdwpStepDepth for details.
+  JDWP::JdwpStepSize step_size;
+  JDWP::JdwpStepDepth step_depth;
+
+  // The location this single-step was initiated from.
+  // A single-step is initiated in a suspended thread. We save here the current method and the
+  // set of DEX pcs associated to the source line number where the suspension occurred.
+  // This is used to support SD_INTO and SD_OVER single-step depths so we detect when a single-step
+  // causes the execution of an instruction in a different method or at a different line number.
+  mirror::ArtMethod* method;
+  std::set<uint32_t> dex_pcs;
+
+  // The stack depth when this single-step was initiated. This is used to support SD_OVER and SD_OUT
+  // single-step depth.
+  int stack_depth;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SingleStepControl);
 };
 
 class Dbg {
@@ -230,6 +264,9 @@
   static void OutputVariableTable(JDWP::RefTypeId ref_type_id, JDWP::MethodId id, bool with_generic,
                                   JDWP::ExpandBuf* pReply)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void OutputMethodReturnValue(JDWP::MethodId method_id, const JValue* return_value,
+                                      JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError GetBytecodes(JDWP::RefTypeId class_id, JDWP::MethodId method_id,
                                       std::vector<uint8_t>& bytecodes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -254,6 +291,8 @@
 
   static std::string StringToUtf8(JDWP::ObjectId string_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void OutputJValue(JDWP::JdwpTag tag, const JValue* return_value, JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * Thread, ThreadGroup, Frame
@@ -327,7 +366,8 @@
     kMethodExit     = 0x08,
   };
   static void PostLocationEvent(const mirror::ArtMethod* method, int pcOffset,
-                                mirror::Object* thisPtr, int eventFlags)
+                                mirror::Object* thisPtr, int eventFlags,
+                                const JValue* return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostException(Thread* thread, const ThrowLocation& throw_location,
                             mirror::ArtMethod* catch_method,
@@ -353,9 +393,9 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError ConfigureStep(JDWP::ObjectId thread_id, JDWP::JdwpStepSize size,
                                        JDWP::JdwpStepDepth depth)
-      LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void UnconfigureStep(JDWP::ObjectId thread_id) LOCKS_EXCLUDED(Locks::breakpoint_lock_);
+  static void UnconfigureStep(JDWP::ObjectId thread_id)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static JDWP::JdwpError InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId object_id,
                                       JDWP::RefTypeId class_id, JDWP::MethodId method_id,
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 7e09a48..517f96c 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -36,6 +36,8 @@
 #include "mirror/string.h"
 #include "os.h"
 #include "safe_map.h"
+#include "ScopedFd.h"
+#include "sirt_ref.h"
 #include "thread.h"
 #include "UniquePtr.h"
 #include "utf-inl.h"
@@ -64,34 +66,34 @@
 
 static int OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
   CHECK(magic != NULL);
-  int fd = open(filename, O_RDONLY, 0);
-  if (fd == -1) {
+  ScopedFd fd(open(filename, O_RDONLY, 0));
+  if (fd.get() == -1) {
     *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
     return -1;
   }
-  int n = TEMP_FAILURE_RETRY(read(fd, magic, sizeof(*magic)));
+  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
   if (n != sizeof(*magic)) {
     *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
     return -1;
   }
-  if (lseek(fd, 0, SEEK_SET) != 0) {
+  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
     *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
                               strerror(errno));
     return -1;
   }
-  return fd;
+  return fd.release();
 }
 
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
   CHECK(checksum != NULL);
   uint32_t magic;
-  int fd = OpenAndReadMagic(filename, &magic, error_msg);
-  if (fd == -1) {
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
+  if (fd.get() == -1) {
     DCHECK(!error_msg->empty());
     return false;
   }
   if (IsZipMagic(magic)) {
-    UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, filename, error_msg));
+    UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
     if (zip_archive.get() == NULL) {
       *error_msg = StringPrintf("Failed to open zip archive '%s'", filename);
       return false;
@@ -105,7 +107,7 @@
     return true;
   }
   if (IsDexMagic(magic)) {
-    UniquePtr<const DexFile> dex_file(DexFile::OpenFile(fd, filename, false, error_msg));
+    UniquePtr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), filename, false, error_msg));
     if (dex_file.get() == NULL) {
       return false;
     }
@@ -120,16 +122,16 @@
                              const char* location,
                              std::string* error_msg) {
   uint32_t magic;
-  int fd = OpenAndReadMagic(filename, &magic, error_msg);
-  if (fd == -1) {
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
+  if (fd.get() == -1) {
     DCHECK(!error_msg->empty());
     return NULL;
   }
   if (IsZipMagic(magic)) {
-    return DexFile::OpenZip(fd, location, error_msg);
+    return DexFile::OpenZip(fd.release(), location, error_msg);
   }
   if (IsDexMagic(magic)) {
-    return DexFile::OpenFile(fd, location, true, error_msg);
+    return DexFile::OpenFile(fd.release(), location, true, error_msg);
   }
   *error_msg = StringPrintf("Expected valid zip or dex file: '%s'", filename);
   return nullptr;
@@ -168,26 +170,26 @@
 const DexFile* DexFile::OpenFile(int fd, const char* location, bool verify,
                                  std::string* error_msg) {
   CHECK(location != nullptr);
-  struct stat sbuf;
-  memset(&sbuf, 0, sizeof(sbuf));
-  if (fstat(fd, &sbuf) == -1) {
-    *error_msg = StringPrintf("DexFile: fstat \'%s\' failed: %s", location, strerror(errno));
-    close(fd);
-    return nullptr;
+  UniquePtr<MemMap> map;
+  {
+    ScopedFd delayed_close(fd);
+    struct stat sbuf;
+    memset(&sbuf, 0, sizeof(sbuf));
+    if (fstat(fd, &sbuf) == -1) {
+      *error_msg = StringPrintf("DexFile: fstat \'%s\' failed: %s", location, strerror(errno));
+      return nullptr;
+    }
+    if (S_ISDIR(sbuf.st_mode)) {
+      *error_msg = StringPrintf("Attempt to mmap directory '%s'", location);
+      return nullptr;
+    }
+    size_t length = sbuf.st_size;
+    map.reset(MemMap::MapFile(length, PROT_READ, MAP_PRIVATE, fd, 0, location, error_msg));
+    if (map.get() == nullptr) {
+      DCHECK(!error_msg->empty());
+      return nullptr;
+    }
   }
-  if (S_ISDIR(sbuf.st_mode)) {
-    *error_msg = StringPrintf("Attempt to mmap directory '%s'", location);
-    return nullptr;
-  }
-  size_t length = sbuf.st_size;
-  UniquePtr<MemMap> map(MemMap::MapFile(length, PROT_READ, MAP_PRIVATE, fd, 0, location,
-                                        error_msg));
-  if (map.get() == nullptr) {
-    DCHECK(!error_msg->empty());
-    close(fd);
-    return nullptr;
-  }
-  close(fd);
 
   if (map->Size() < sizeof(DexFile::Header)) {
     *error_msg = StringPrintf(
@@ -220,7 +222,7 @@
     DCHECK(!error_msg->empty());
     return nullptr;
   }
-  return DexFile::Open(*zip_archive.get(), location, error_msg);
+  return DexFile::Open(*zip_archive, location, error_msg);
 }
 
 const DexFile* DexFile::OpenMemory(const std::string& location,
@@ -508,7 +510,8 @@
 }
 
 const DexFile::ProtoId* DexFile::FindProtoId(uint16_t return_type_idx,
-                                         const std::vector<uint16_t>& signature_type_idxs) const {
+                                             const uint16_t* signature_type_idxs,
+                                             uint32_t signature_length) const {
   int32_t lo = 0;
   int32_t hi = NumProtoIds() - 1;
   while (hi >= lo) {
@@ -518,7 +521,7 @@
     if (compare == 0) {
       DexFileParameterIterator it(*this, proto);
       size_t i = 0;
-      while (it.HasNext() && i < signature_type_idxs.size() && compare == 0) {
+      while (it.HasNext() && i < signature_length && compare == 0) {
         compare = signature_type_idxs[i] - it.GetTypeIdx();
         it.Next();
         i++;
@@ -526,7 +529,7 @@
       if (compare == 0) {
         if (it.HasNext()) {
           compare = -1;
-        } else if (i < signature_type_idxs.size()) {
+        } else if (i < signature_length) {
           compare = 1;
         }
       }
@@ -552,22 +555,19 @@
   size_t end = signature.size();
   bool process_return = false;
   while (offset < end) {
+    size_t start_offset = offset;
     char c = signature[offset];
     offset++;
     if (c == ')') {
       process_return = true;
       continue;
     }
-    // TODO: avoid building a string.
-    std::string descriptor;
-    descriptor += c;
     while (c == '[') {  // process array prefix
       if (offset >= end) {  // expect some descriptor following [
         return false;
       }
       c = signature[offset];
       offset++;
-      descriptor += c;
     }
     if (c == 'L') {  // process type descriptors
       do {
@@ -576,9 +576,10 @@
         }
         c = signature[offset];
         offset++;
-        descriptor += c;
       } while (c != ';');
     }
+    // TODO: avoid creating a std::string just to get a 0-terminated char array
+    std::string descriptor(signature.data() + start_offset, offset - start_offset);
     const DexFile::StringId* string_id = FindStringId(descriptor.c_str());
     if (string_id == NULL) {
       return false;
@@ -719,9 +720,9 @@
   for (;;)  {
     uint8_t opcode = *stream++;
     uint16_t reg;
-    uint16_t name_idx;
-    uint16_t descriptor_idx;
-    uint16_t signature_idx = 0;
+    uint32_t name_idx;
+    uint32_t descriptor_idx;
+    uint32_t signature_idx = 0;
 
     switch (opcode) {
       case DBG_END_SEQUENCE:
@@ -875,6 +876,32 @@
   return result;
 }
 
+bool Signature::operator==(const StringPiece& rhs) const {
+  if (dex_file_ == nullptr) {
+    return false;
+  }
+  StringPiece tail(rhs);
+  if (!tail.starts_with("(")) {
+    return false;  // Invalid signature
+  }
+  tail.remove_prefix(1);  // "(";
+  const DexFile::TypeList* params = dex_file_->GetProtoParameters(*proto_id_);
+  if (params != nullptr) {
+    for (uint32_t i = 0; i < params->Size(); ++i) {
+      StringPiece param(dex_file_->StringByTypeIdx(params->GetTypeItem(i).type_idx_));
+      if (!tail.starts_with(param)) {
+        return false;
+      }
+      tail.remove_prefix(param.length());
+    }
+  }
+  if (!tail.starts_with(")")) {
+    return false;
+  }
+  tail.remove_prefix(1);  // ")";
+  return tail == dex_file_->StringByTypeIdx(proto_id_->return_type_idx_);
+}
+
 std::ostream& operator<<(std::ostream& os, const Signature& sig) {
   return os << sig.ToString();
 }
@@ -962,12 +989,14 @@
 }
 
 EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(const DexFile& dex_file,
-                                                                 mirror::DexCache* dex_cache,
-                                                                 mirror::ClassLoader* class_loader,
+                                                                 SirtRef<mirror::DexCache>* dex_cache,
+                                                                 SirtRef<mirror::ClassLoader>* class_loader,
                                                                  ClassLinker* linker,
                                                                  const DexFile::ClassDef& class_def)
     : dex_file_(dex_file), dex_cache_(dex_cache), class_loader_(class_loader), linker_(linker),
       array_size_(), pos_(-1), type_(kByte) {
+  DCHECK(dex_cache != nullptr);
+  DCHECK(class_loader != nullptr);
   ptr_ = dex_file.GetEncodedStaticFieldValuesArray(class_def);
   if (ptr_ == NULL) {
     array_size_ = 0;
@@ -1050,12 +1079,15 @@
     case kDouble:  field->SetDouble(field->GetDeclaringClass(), jval_.d); break;
     case kNull:    field->SetObject(field->GetDeclaringClass(), NULL); break;
     case kString: {
-      mirror::String* resolved = linker_->ResolveString(dex_file_, jval_.i, dex_cache_);
+      CHECK(!kMovingFields);
+      mirror::String* resolved = linker_->ResolveString(dex_file_, jval_.i, *dex_cache_);
       field->SetObject(field->GetDeclaringClass(), resolved);
       break;
     }
     case kType: {
-      mirror::Class* resolved = linker_->ResolveType(dex_file_, jval_.i, dex_cache_, class_loader_);
+      CHECK(!kMovingFields);
+      mirror::Class* resolved = linker_->ResolveType(dex_file_, jval_.i, *dex_cache_,
+                                                     *class_loader_);
       field->SetObject(field->GetDeclaringClass(), resolved);
       break;
     }
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index a9c24e6..69593cd 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -43,6 +43,8 @@
 }  // namespace mirror
 class ClassLinker;
 class Signature;
+template <typename T>
+class SirtRef;
 class StringPiece;
 class ZipArchive;
 
@@ -659,7 +661,11 @@
 
   // Looks up a proto id for a given return type and signature type list
   const ProtoId* FindProtoId(uint16_t return_type_idx,
-                             const std::vector<uint16_t>& signature_type_idxs_) const;
+                             const uint16_t* signature_type_idxs, uint32_t signature_length) const;
+  const ProtoId* FindProtoId(uint16_t return_type_idx,
+                             const std::vector<uint16_t>& signature_type_idxs) const {
+    return FindProtoId(return_type_idx, &signature_type_idxs[0], signature_type_idxs.size());
+  }
 
   // Given a signature place the type ids into the given vector, returns true on success
   bool CreateTypeList(const StringPiece& signature, uint16_t* return_type_idx,
@@ -958,10 +964,7 @@
     return !(*this == rhs);
   }
 
-  bool operator==(const StringPiece& rhs) const {
-    // TODO: Avoid temporary string allocation.
-    return ToString() == rhs;
-  }
+  bool operator==(const StringPiece& rhs) const;
 
  private:
   Signature(const DexFile* dex, const DexFile::ProtoId& proto) : dex_file_(dex), proto_id_(&proto) {
@@ -1152,8 +1155,8 @@
 
 class EncodedStaticFieldValueIterator {
  public:
-  EncodedStaticFieldValueIterator(const DexFile& dex_file, mirror::DexCache* dex_cache,
-                                  mirror::ClassLoader* class_loader,
+  EncodedStaticFieldValueIterator(const DexFile& dex_file, SirtRef<mirror::DexCache>* dex_cache,
+                                  SirtRef<mirror::ClassLoader>* class_loader,
                                   ClassLinker* linker, const DexFile::ClassDef& class_def)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -1187,8 +1190,8 @@
   static const byte kEncodedValueArgShift = 5;
 
   const DexFile& dex_file_;
-  mirror::DexCache* dex_cache_;  // Dex cache to resolve literal objects.
-  mirror::ClassLoader* class_loader_;  // ClassLoader to resolve types.
+  SirtRef<mirror::DexCache>* const dex_cache_;  // Dex cache to resolve literal objects.
+  SirtRef<mirror::ClassLoader>* const class_loader_;  // ClassLoader to resolve types.
   ClassLinker* linker_;  // Linker to resolve literal objects.
   size_t array_size_;  // Size of array.
   size_t pos_;  // Current position.
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 24ab1ce..2806f94 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -33,20 +33,20 @@
 
 namespace art {
 
-static inline bool CheckFilledNewArrayAlloc(uint32_t type_idx, mirror::ArtMethod* referrer,
-                                            int32_t component_count, Thread* self,
-                                            bool access_check, mirror::Class** klass_ptr)
+static inline mirror::Class* CheckFilledNewArrayAlloc(uint32_t type_idx, mirror::ArtMethod* referrer,
+                                                      int32_t component_count, Thread* self,
+                                                      bool access_check)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(component_count < 0)) {
     ThrowNegativeArraySizeException(component_count);
-    return false;  // Failure
+    return nullptr;  // Failure
   }
   mirror::Class* klass = referrer->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
   if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, referrer);
     if (klass == NULL) {  // Error
       DCHECK(self->IsExceptionPending());
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
   if (UNLIKELY(klass->IsPrimitive() && !klass->IsPrimitiveInt())) {
@@ -60,40 +60,43 @@
                                "Found type %s; filled-new-array not implemented for anything but \'int\'",
                                PrettyDescriptor(klass).c_str());
     }
-    return false;  // Failure
+    return nullptr;  // Failure
   }
   if (access_check) {
     mirror::Class* referrer_klass = referrer->GetDeclaringClass();
     if (UNLIKELY(!referrer_klass->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer_klass, klass);
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
   DCHECK(klass->IsArrayClass()) << PrettyClass(klass);
-  *klass_ptr = klass;
-  return true;
+  return klass;
 }
 
 // Helper function to allocate array for FILLED_NEW_ARRAY.
 mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* referrer,
                                           int32_t component_count, Thread* self,
-                                          bool access_check) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, access_check, &klass))) {
-    return NULL;
+                                          bool access_check,
+                                          gc::AllocatorType allocator_type) {
+  mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self,
+                                                  access_check);
+  if (UNLIKELY(klass == nullptr)) {
+    return nullptr;
   }
-  return mirror::Array::AllocUninstrumented(self, klass, component_count);
+  return mirror::Array::Alloc<false>(self, klass, component_count, allocator_type);
 }
 
 // Helper function to allocate array for FILLED_NEW_ARRAY.
 mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* referrer,
                                                       int32_t component_count, Thread* self,
-                                                      bool access_check) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, access_check, &klass))) {
-    return NULL;
+                                                      bool access_check,
+                                                      gc::AllocatorType allocator_type) {
+  mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self,
+                                                  access_check);
+  if (UNLIKELY(klass == nullptr)) {
+    return nullptr;
   }
-  return mirror::Array::AllocInstrumented(self, klass, component_count);
+  return mirror::Array::Alloc<true>(self, klass, component_count, allocator_type);
 }
 
 void ThrowStackOverflowError(Thread* self) {
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 7ce50c5..747dd56 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -27,9 +27,11 @@
 #include "mirror/art_method.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
 #include "mirror/throwable.h"
+#include "locks.h"
 #include "object_utils.h"
-
+#include "sirt_ref.h"
 #include "thread.h"
 
 namespace art {
@@ -40,130 +42,122 @@
   class Object;
 }  // namespace mirror
 
-static inline bool CheckObjectAlloc(uint32_t type_idx, mirror::ArtMethod* method,
-                                    Thread* self,
-                                    bool access_check,
-                                    mirror::Class** klass_ptr)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+// TODO: Fix no thread safety analysis when GCC can handle template specialization.
+template <const bool kAccessCheck>
+ALWAYS_INLINE static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
+                                                            mirror::ArtMethod* method,
+                                                            Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS {
   mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
-  Runtime* runtime = Runtime::Current();
   if (UNLIKELY(klass == NULL)) {
-    klass = runtime->GetClassLinker()->ResolveType(type_idx, method);
+    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
     if (klass == NULL) {
       DCHECK(self->IsExceptionPending());
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
-  if (access_check) {
+  if (kAccessCheck) {
     if (UNLIKELY(!klass->IsInstantiable())) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       self->ThrowNewException(throw_location, "Ljava/lang/InstantiationError;",
                               PrettyDescriptor(klass).c_str());
-      return false;  // Failure
+      return nullptr;  // Failure
     }
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
-  if (!klass->IsInitialized() &&
-      !runtime->GetClassLinker()->EnsureInitialized(klass, true, true)) {
-    DCHECK(self->IsExceptionPending());
-    return false;  // Failure
+  if (UNLIKELY(!klass->IsInitialized())) {
+    SirtRef<mirror::Class> sirt_klass(self, klass);
+    // The class initializer might cause a GC.
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(klass, true, true)) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;  // Failure
+    }
+    return sirt_klass.get();
   }
-  *klass_ptr = klass;
-  return true;
+  return klass;
 }
 
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
 // cannot be resolved, throw an error. If it can, use it to create an instance.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
-static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                  Thread* self,
-                                                  bool access_check)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckObjectAlloc(type_idx, method, self, access_check, &klass))) {
-    return NULL;
+// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
+template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
+                                                                mirror::ArtMethod* method,
+                                                                Thread* self,
+                                                                gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self);
+  if (UNLIKELY(klass == nullptr)) {
+    return nullptr;
   }
-  return klass->AllocObjectUninstrumented(self);
+  return klass->Alloc<kInstrumented>(self, allocator_type);
 }
 
-static inline mirror::Object* AllocObjectFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                              Thread* self,
-                                                              bool access_check)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckObjectAlloc(type_idx, method, self, access_check, &klass))) {
-    return NULL;
-  }
-  return klass->AllocObjectInstrumented(self);
-}
-
-static inline bool CheckArrayAlloc(uint32_t type_idx, mirror::ArtMethod* method,
-                                   int32_t component_count,
-                                   bool access_check, mirror::Class** klass_ptr)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+// TODO: Fix no thread safety analysis when GCC can handle template specialization.
+template <bool kAccessCheck>
+ALWAYS_INLINE static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
+                                                           mirror::ArtMethod* method,
+                                                           int32_t component_count)
+    NO_THREAD_SAFETY_ANALYSIS {
   if (UNLIKELY(component_count < 0)) {
     ThrowNegativeArraySizeException(component_count);
-    return false;  // Failure
+    return nullptr;  // Failure
   }
   mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
-  if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
+  if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
     if (klass == NULL) {  // Error
       DCHECK(Thread::Current()->IsExceptionPending());
-      return false;  // Failure
+      return nullptr;  // Failure
     }
     CHECK(klass->IsArrayClass()) << PrettyClass(klass);
   }
-  if (access_check) {
+  if (kAccessCheck) {
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
-  *klass_ptr = klass;
-  return true;
+  return klass;
 }
 
 // Given the context of a calling Method, use its DexCache to resolve a type to an array Class. If
 // it cannot be resolved, throw an error. If it can, use it to create an array.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
-static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                int32_t component_count,
-                                                Thread* self, bool access_check)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckArrayAlloc(type_idx, method, component_count, access_check, &klass))) {
-    return NULL;
+// TODO: Fix no thread safety analysis when GCC can handle template specialization.
+template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
+                                                              mirror::ArtMethod* method,
+                                                              int32_t component_count,
+                                                              Thread* self,
+                                                              gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  mirror::Class* klass = CheckArrayAlloc<kAccessCheck>(type_idx, method, component_count);
+  if (UNLIKELY(klass == nullptr)) {
+    return nullptr;
   }
-  return mirror::Array::AllocUninstrumented(self, klass, component_count);
-}
-
-static inline mirror::Array* AllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                            int32_t component_count,
-                                                            Thread* self, bool access_check)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckArrayAlloc(type_idx, method, component_count, access_check, &klass))) {
-    return NULL;
-  }
-  return mirror::Array::AllocInstrumented(self, klass, component_count);
+  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count, allocator_type);
 }
 
 extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                 int32_t component_count,
-                                                 Thread* self, bool access_check)
+                                                 int32_t component_count, Thread* self,
+                                                 bool access_check,
+                                                 gc::AllocatorType allocator_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-extern mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                             int32_t component_count,
-                                                             Thread* self, bool access_check)
+extern mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx,
+                                                             mirror::ArtMethod* method,
+                                                             int32_t component_count, Thread* self,
+                                                             bool access_check,
+                                                             gc::AllocatorType allocator_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Type of find field operation for fast and slow case.
diff --git a/runtime/entrypoints/portable/portable_alloc_entrypoints.cc b/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
index 91b7353..0d57516 100644
--- a/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
@@ -24,14 +24,14 @@
                                                                mirror::ArtMethod* referrer,
                                                                Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocObjectFromCode(type_idx, referrer, thread, false);
+  return AllocObjectFromCode<false, true>(type_idx, referrer, thread, gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_alloc_object_from_code_with_access_check(uint32_t type_idx,
                                                                                  mirror::ArtMethod* referrer,
                                                                                  Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocObjectFromCode(type_idx, referrer, thread, true);
+  return AllocObjectFromCode<true, true>(type_idx, referrer, thread, gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_alloc_array_from_code(uint32_t type_idx,
@@ -39,7 +39,8 @@
                                                               uint32_t length,
                                                               Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocArrayFromCode(type_idx, referrer, length, self, false);
+  return AllocArrayFromCode<false, true>(type_idx, referrer, length, self,
+                                         gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_alloc_array_from_code_with_access_check(uint32_t type_idx,
@@ -47,7 +48,8 @@
                                                                                 uint32_t length,
                                                                                 Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocArrayFromCode(type_idx, referrer, length, self, true);
+  return AllocArrayFromCode<true, true>(type_idx, referrer, length, self,
+                                        gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_check_and_alloc_array_from_code(uint32_t type_idx,
@@ -55,7 +57,8 @@
                                                                         uint32_t length,
                                                                         Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return CheckAndAllocArrayFromCode(type_idx, referrer, length, thread, false);
+  return CheckAndAllocArrayFromCodeInstrumented(type_idx, referrer, length, thread, false,
+                                                gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_check_and_alloc_array_from_code_with_access_check(uint32_t type_idx,
@@ -63,7 +66,8 @@
                                                                                           uint32_t length,
                                                                                           Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return CheckAndAllocArrayFromCode(type_idx, referrer, length, thread, true);
+  return CheckAndAllocArrayFromCodeInstrumented(type_idx, referrer, length, thread, true,
+                                                gc::kAllocatorTypeFreeList);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 6f7b1ab..9155088 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -23,110 +23,63 @@
 
 namespace art {
 
-extern "C" mirror::Object* artAllocObjectFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                  Thread* self, mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocObjectFromCode(type_idx, method, self, false);
+#define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \
+extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
+} \
+extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocObjectFromCode<true, instrumented_bool>(type_idx, method, self, allocator_type); \
+} \
+extern "C" mirror::Array* artAllocArrayFromCode##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocArrayFromCode<false, instrumented_bool>(type_idx, method, component_count, self, \
+                                                      allocator_type); \
+} \
+extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocArrayFromCode<true, instrumented_bool>(type_idx, method, component_count, self, \
+                                                     allocator_type); \
+} \
+extern "C" mirror::Array* artCheckAndAllocArrayFromCode##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  if (!instrumented_bool) { \
+    return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, false, allocator_type); \
+  } else { \
+    return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false, allocator_type); \
+  } \
+} \
+extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  if (!instrumented_bool) { \
+    return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, true, allocator_type); \
+  } else { \
+    return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true, allocator_type); \
+  } \
 }
 
-extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck(uint32_t type_idx,
-                                                                 mirror::ArtMethod* method,
-                                                                 Thread* self,
-                                                                 mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocObjectFromCode(type_idx, method, self, true);
-}
+#define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(suffix, allocator_type) \
+    GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, Instrumented, true, allocator_type) \
+    GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, , false, allocator_type)
 
-extern "C" mirror::Array* artAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                int32_t component_count, Thread* self,
-                                                mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocArrayFromCode(type_idx, method, component_count, self, false);
-}
-
-extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheck(uint32_t type_idx,
-                                                               mirror::ArtMethod* method,
-                                                               int32_t component_count,
-                                                               Thread* self,
-                                                               mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocArrayFromCode(type_idx, method, component_count, self, true);
-}
-
-extern "C" mirror::Array* artCheckAndAllocArrayFromCode(uint32_t type_idx,
-                                                        mirror::ArtMethod* method,
-                                                        int32_t component_count, Thread* self,
-                                                        mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, false);
-}
-
-extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheck(uint32_t type_idx,
-                                                                       mirror::ArtMethod* method,
-                                                                       int32_t component_count,
-                                                                       Thread* self,
-                                                                       mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, true);
-}
-
-extern "C" mirror::Object* artAllocObjectFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                              Thread* self, mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocObjectFromCodeInstrumented(type_idx, method, self, false);
-}
-
-extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
-                                                                             mirror::ArtMethod* method,
-                                                                             Thread* self,
-                                                                             mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocObjectFromCodeInstrumented(type_idx, method, self, true);
-}
-
-extern "C" mirror::Array* artAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                            int32_t component_count, Thread* self,
-                                                              mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false);
-}
-
-extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
-                                                                           mirror::ArtMethod* method,
-                                                                           int32_t component_count,
-                                                                           Thread* self,
-                                                                           mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true);
-}
-
-extern "C" mirror::Array* artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx,
-                                                                    mirror::ArtMethod* method,
-                                                                    int32_t component_count, Thread* self,
-                                                                    mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false);
-}
-
-extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
-                                                                                   mirror::ArtMethod* method,
-                                                                                   int32_t component_count,
-                                                                                   Thread* self,
-                                                                                   mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true);
-}
+GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(, gc::kAllocatorTypeFreeList)
+GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(BumpPointer, gc::kAllocatorTypeBumpPointer)
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 2102ab1..540abb3 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -29,9 +29,15 @@
                               "Null reference used for synchronization (monitor-enter)");
     return -1;  // Failure.
   } else {
-    obj->MonitorEnter(self);  // May block
-    DCHECK(self->HoldsLock(obj));
-    DCHECK(!self->IsExceptionPending());
+    if (kIsDebugBuild) {
+      // GC may move the obj, need Sirt for the following DCHECKs.
+      SirtRef<mirror::Object> sirt_obj(self, obj);
+      obj->MonitorEnter(self);  // May block
+      CHECK(self->HoldsLock(sirt_obj.get()));
+      CHECK(!self->IsExceptionPending());
+    } else {
+      obj->MonitorEnter(self);  // May block
+    }
     return 0;  // Success.
     // Only possible exception is NPE and is handled before entry
   }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 01d3549..8ba08ee 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -416,10 +416,10 @@
 
 // Read object references held in arguments from quick frames and place in a JNI local references,
 // so they don't get garbage collected.
-class RememberFoGcArgumentVisitor : public QuickArgumentVisitor {
+class RememberForGcArgumentVisitor : public QuickArgumentVisitor {
  public:
-  RememberFoGcArgumentVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty,
-                              uint32_t shorty_len, ScopedObjectAccessUnchecked* soa) :
+  RememberForGcArgumentVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty,
+                               uint32_t shorty_len, ScopedObjectAccessUnchecked* soa) :
     QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa) {}
 
   virtual void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -441,7 +441,7 @@
  private:
   ScopedObjectAccessUnchecked* soa_;
   std::vector<std::pair<jobject, mirror::Object**> > references_;
-  DISALLOW_COPY_AND_ASSIGN(RememberFoGcArgumentVisitor);
+  DISALLOW_COPY_AND_ASSIGN(RememberForGcArgumentVisitor);
 };
 
 // Lazily resolve a method for quick. Called by stub code.
@@ -531,7 +531,7 @@
   uint32_t shorty_len;
   const char* shorty =
       dex_file->GetMethodShorty(dex_file->GetMethodId(dex_method_idx), &shorty_len);
-  RememberFoGcArgumentVisitor visitor(sp, invoke_type == kStatic, shorty, shorty_len, &soa);
+  RememberForGcArgumentVisitor visitor(sp, invoke_type == kStatic, shorty, shorty_len, &soa);
   visitor.VisitArguments();
   thread->EndAssertNoThreadSuspension(old_cause);
   // Resolve method filling in dex cache.
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index a5f9997..e9a6e4f 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -39,7 +39,7 @@
     ScopedObjectAccess soa(Thread::Current());
     SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
                                       soa.Decode<mirror::ClassLoader*>(LoadDex("ExceptionHandle")));
-    my_klass_ = class_linker_->FindClass("LExceptionHandle;", class_loader.get());
+    my_klass_ = class_linker_->FindClass("LExceptionHandle;", class_loader);
     ASSERT_TRUE(my_klass_ != NULL);
     class_linker_->EnsureInitialized(my_klass_, true, true);
 
diff --git a/runtime/gc/accounting/mod_union_table-inl.h b/runtime/gc/accounting/mod_union_table-inl.h
index fb425df..19c6768 100644
--- a/runtime/gc/accounting/mod_union_table-inl.h
+++ b/runtime/gc/accounting/mod_union_table-inl.h
@@ -37,7 +37,7 @@
     typedef std::vector<space::ContinuousSpace*>::const_iterator It;
     for (It it = spaces.begin(); it != spaces.end(); ++it) {
       if ((*it)->Contains(ref)) {
-        return (*it)->IsDlMallocSpace();
+        return (*it)->IsMallocSpace();
       }
     }
     // Assume it points to a large object.
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 7cbe94d..faa198a 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -82,7 +82,7 @@
     if (ref != nullptr) {
       Object* new_ref = visitor_(ref, arg_);
       if (new_ref != ref) {
-        obj->SetFieldObject(offset, ref, false, true);
+        obj->SetFieldObject(offset, new_ref, true);
       }
     }
   }
@@ -154,7 +154,7 @@
     // We don't have an early exit since we use the visitor pattern, an early
     // exit should significantly speed this up.
     AddToReferenceArrayVisitor visitor(mod_union_table_, references_);
-    collector::MarkSweep::VisitObjectReferences(obj, visitor);
+    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
   }
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
@@ -206,7 +206,7 @@
     Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
     DCHECK(obj != NULL);
     CheckReferenceVisitor visitor(mod_union_table_, references_);
-    collector::MarkSweep::VisitObjectReferences(obj, visitor);
+    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
   }
 
  private:
@@ -334,7 +334,7 @@
   for (const byte* card_addr : cleared_cards_) {
     auto start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
     auto end = start + CardTable::kCardSize;
-    os << reinterpret_cast<void*>(start) << "-" << reinterpret_cast<void*>(end) << ",";
+    os << reinterpret_cast<void*>(start) << "-" << reinterpret_cast<void*>(end) << "\n";
   }
   os << "]";
 }
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h
new file mode 100644
index 0000000..f395314
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc-inl.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_INL_H_
+#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_INL_H_
+
+#include "rosalloc.h"
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) {
+  if (UNLIKELY(size > kLargeSizeThreshold)) {
+    return AllocLargeObject(self, size, bytes_allocated);
+  }
+  void* m = AllocFromRun(self, size, bytes_allocated);
+  // Check if the returned memory is really all zero.
+  if (kCheckZeroMemory && m != NULL) {
+    byte* bytes = reinterpret_cast<byte*>(m);
+    for (size_t i = 0; i < size; ++i) {
+      DCHECK_EQ(bytes[i], 0);
+    }
+  }
+  return m;
+}
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_INL_H_
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
new file mode 100644
index 0000000..3030fa7
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -0,0 +1,1615 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/mutex-inl.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "rosalloc.h"
+
+#include <map>
+#include <list>
+#include <vector>
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment);
+
+size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
+size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
+size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
+size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
+size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
+size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
+bool RosAlloc::initialized_ = false;
+
+RosAlloc::RosAlloc(void* base, size_t capacity)
+    : base_(reinterpret_cast<byte*>(base)), footprint_(capacity),
+      capacity_(capacity),
+      lock_("rosalloc global lock", kRosAllocGlobalLock),
+      bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock) {
+  DCHECK(RoundUp(capacity, kPageSize) == capacity);
+  if (!initialized_) {
+    Initialize();
+  }
+  VLOG(heap) << "RosAlloc base="
+             << std::hex << (intptr_t)base_ << ", end="
+             << std::hex << (intptr_t)(base_ + capacity_)
+             << ", capacity=" << std::dec << capacity_;
+  memset(current_runs_, 0, sizeof(current_runs_));
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    size_bracket_locks_[i] = new Mutex("an rosalloc size bracket lock",
+                                       kRosAllocBracketLock);
+  }
+  size_t num_of_pages = capacity_ / kPageSize;
+  page_map_.resize(num_of_pages);
+  free_page_run_size_map_.resize(num_of_pages);
+
+  FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
+  if (kIsDebugBuild) {
+    free_pages->magic_num_ = kMagicNumFree;
+  }
+  free_pages->SetByteSize(this, capacity_);
+  DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
+  free_pages->ReleasePages(this);
+  free_page_runs_.insert(free_pages);
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
+              << reinterpret_cast<intptr_t>(free_pages)
+              << " into free_page_runs_";
+  }
+}
+
+void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
+  lock_.AssertHeld(self);
+  DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
+  FreePageRun* res = NULL;
+  size_t req_byte_size = num_pages * kPageSize;
+  // Find the lowest address free page run that's large enough.
+  for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
+    FreePageRun* fpr = *it;
+    DCHECK(fpr->IsFree());
+    size_t fpr_byte_size = fpr->ByteSize(this);
+    DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
+    if (req_byte_size <= fpr_byte_size) {
+      // Found one.
+      free_page_runs_.erase(it++);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
+                  << std::hex << reinterpret_cast<intptr_t>(fpr)
+                  << " from free_page_runs_";
+      }
+      if (req_byte_size < fpr_byte_size) {
+        // Split.
+        FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
+        if (kIsDebugBuild) {
+          remainder->magic_num_ = kMagicNumFree;
+        }
+        remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
+        DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        // Don't need to call madvise on remainder here.
+        free_page_runs_.insert(remainder);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(remainder)
+                    << " into free_page_runs_";
+        }
+        fpr->SetByteSize(this, req_byte_size);
+        DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+      }
+      res = fpr;
+      break;
+    } else {
+      ++it;
+    }
+  }
+
+  // Failed to allocate pages. Grow the footprint, if possible.
+  if (UNLIKELY(res == NULL && capacity_ > footprint_)) {
+    FreePageRun* last_free_page_run = NULL;
+    size_t last_free_page_run_size;
+    auto it = free_page_runs_.rbegin();
+    if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
+      // There is a free page run at the end.
+      DCHECK(last_free_page_run->IsFree());
+      DCHECK(page_map_[ToPageMapIndex(last_free_page_run)] == kPageMapEmpty);
+      last_free_page_run_size = last_free_page_run->ByteSize(this);
+    } else {
+      // There is no free page run at the end.
+      last_free_page_run_size = 0;
+    }
+    DCHECK_LT(last_free_page_run_size, req_byte_size);
+    if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
+      // If we grow the heap, we can allocate it.
+      size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
+                                  capacity_ - footprint_);
+      DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
+      size_t new_footprint = footprint_ + increment;
+      size_t new_num_of_pages = new_footprint / kPageSize;
+      DCHECK_LT(page_map_.size(), new_num_of_pages);
+      DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
+      page_map_.resize(new_num_of_pages);
+      free_page_run_size_map_.resize(new_num_of_pages);
+      art_heap_rosalloc_morecore(this, increment);
+      if (last_free_page_run_size > 0) {
+        // There was a free page run at the end. Expand its size.
+        DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
+        last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
+        DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        DCHECK(last_free_page_run->End(this) == base_ + new_footprint);
+      } else {
+        // Otherwise, insert a new free page run at the end.
+        FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
+        if (kIsDebugBuild) {
+          new_free_page_run->magic_num_ = kMagicNumFree;
+        }
+        new_free_page_run->SetByteSize(this, increment);
+        DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        free_page_runs_.insert(new_free_page_run);
+        DCHECK(*free_page_runs_.rbegin() == new_free_page_run);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
+                    << std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
+                    << " into free_page_runs_";
+        }
+      }
+      DCHECK_LE(footprint_ + increment, capacity_);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
+                  << footprint_ << " to " << new_footprint;
+      }
+      footprint_ = new_footprint;
+
+      // And retry the last free page run.
+      it = free_page_runs_.rbegin();
+      DCHECK(it != free_page_runs_.rend());
+      FreePageRun* fpr = *it;
+      if (kIsDebugBuild && last_free_page_run_size > 0) {
+        DCHECK(last_free_page_run != NULL);
+        DCHECK_EQ(last_free_page_run, fpr);
+      }
+      size_t fpr_byte_size = fpr->ByteSize(this);
+      DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
+      DCHECK_LE(req_byte_size, fpr_byte_size);
+      free_page_runs_.erase(fpr);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
+                  << " from free_page_runs_";
+      }
+      if (req_byte_size < fpr_byte_size) {
+        // Split if there's a remainder.
+        FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
+        if (kIsDebugBuild) {
+          remainder->magic_num_ = kMagicNumFree;
+        }
+        remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
+        DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        free_page_runs_.insert(remainder);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(remainder)
+                    << " into free_page_runs_";
+        }
+        fpr->SetByteSize(this, req_byte_size);
+        DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+      }
+      res = fpr;
+    }
+  }
+  if (LIKELY(res != NULL)) {
+    // Update the page map.
+    size_t page_map_idx = ToPageMapIndex(res);
+    for (size_t i = 0; i < num_pages; i++) {
+      DCHECK(page_map_[page_map_idx + i] == kPageMapEmpty);
+    }
+    switch (page_map_type) {
+    case kPageMapRun:
+      page_map_[page_map_idx] = kPageMapRun;
+      for (size_t i = 1; i < num_pages; i++) {
+        page_map_[page_map_idx + i] = kPageMapRunPart;
+      }
+      break;
+    case kPageMapLargeObject:
+      page_map_[page_map_idx] = kPageMapLargeObject;
+      for (size_t i = 1; i < num_pages; i++) {
+        page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unreachable - page map type: " << page_map_type;
+      break;
+    }
+    if (kIsDebugBuild) {
+      // Clear the first page which isn't madvised away in the debug
+      // build for the magic number.
+      memset(res, 0, kPageSize);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
+                << "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
+                << "(" << std::dec << (num_pages * kPageSize) << ")";
+    }
+    return res;
+  }
+
+  // Fail.
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::AllocPages() : NULL";
+  }
+  return nullptr;
+}
+
+void RosAlloc::FreePages(Thread* self, void* ptr) {
+  lock_.AssertHeld(self);
+  size_t pm_idx = ToPageMapIndex(ptr);
+  DCHECK(pm_idx < page_map_.size());
+  byte pm_type = page_map_[pm_idx];
+  DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
+  byte pm_part_type;
+  switch (pm_type) {
+  case kPageMapRun:
+    pm_part_type = kPageMapRunPart;
+    break;
+  case kPageMapLargeObject:
+    pm_part_type = kPageMapLargeObjectPart;
+    break;
+  default:
+    pm_part_type = kPageMapEmpty;
+    LOG(FATAL) << "Unreachable - RosAlloc::FreePages() : " << "pm_idx=" << pm_idx << ", pm_type="
+               << static_cast<int>(pm_type) << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    return;
+  }
+  // Update the page map and count the number of pages.
+  size_t num_pages = 1;
+  page_map_[pm_idx] = kPageMapEmpty;
+  size_t idx = pm_idx + 1;
+  size_t end = page_map_.size();
+  while (idx < end && page_map_[idx] == pm_part_type) {
+    page_map_[idx] = kPageMapEmpty;
+    num_pages++;
+    idx++;
+  }
+
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+              << "-0x" << (reinterpret_cast<intptr_t>(ptr) + num_pages * kPageSize)
+              << "(" << std::dec << (num_pages * kPageSize) << ")";
+  }
+
+  // Turn it into a free run.
+  FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
+  if (kIsDebugBuild) {
+    fpr->magic_num_ = kMagicNumFree;
+  }
+  fpr->SetByteSize(this, num_pages * kPageSize);
+  DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+
+  DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
+  if (!free_page_runs_.empty()) {
+    // Try to coalesce in the higher address direction.
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
+                << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
+                << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
+                << (fpr->End(this) == End() ? page_map_.size() : ToPageMapIndex(fpr->End(this))) << "]";
+    }
+    auto higher_it = free_page_runs_.upper_bound(fpr);
+    if (higher_it != free_page_runs_.end()) {
+      for (auto it = higher_it; it != free_page_runs_.end(); ) {
+        FreePageRun* h = *it;
+        DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
+                    << (h->End(this) == End() ? page_map_.size() : ToPageMapIndex(h->End(this))) << "]";
+        }
+        if (fpr->End(this) == h->Begin()) {
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Success";
+          }
+          free_page_runs_.erase(it++);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(h)
+                      << " from free_page_runs_";
+          }
+          fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
+          DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        } else {
+          // Not adjacent. Stop.
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Fail";
+          }
+          break;
+        }
+      }
+    }
+    // Try to coalesce in the lower address direction.
+    auto lower_it = free_page_runs_.upper_bound(fpr);
+    if (lower_it != free_page_runs_.begin()) {
+      --lower_it;
+      for (auto it = lower_it; ; ) {
+        // We want to try to coalesce with the first element but
+        // there's no "<=" operator for the iterator.
+        bool to_exit_loop = it == free_page_runs_.begin();
+
+        FreePageRun* l = *it;
+        DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
+                    << (l->End(this) == End() ? page_map_.size() : ToPageMapIndex(l->End(this))) << "]";
+        }
+        if (l->End(this) == fpr->Begin()) {
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Success";
+          }
+          free_page_runs_.erase(it--);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(l)
+                      << " from free_page_runs_";
+          }
+          l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
+          DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+          fpr = l;
+        } else {
+          // Not adjacent. Stop.
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Fail";
+          }
+          break;
+        }
+        if (to_exit_loop) {
+          break;
+        }
+      }
+    }
+  }
+
+  // Insert it.
+  DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+  DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
+  fpr->ReleasePages(this);
+  free_page_runs_.insert(fpr);
+  DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
+              << " into free_page_runs_";
+  }
+}
+
+void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) {
+  DCHECK(size > kLargeSizeThreshold);
+  size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
+  void* r;
+  {
+    MutexLock mu(self, lock_);
+    r = AllocPages(self, num_pages, kPageMapLargeObject);
+  }
+  if (bytes_allocated != NULL) {
+    *bytes_allocated = num_pages * kPageSize;
+  }
+  if (kTraceRosAlloc) {
+    if (r != NULL) {
+      LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
+                << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
+                << "(" << std::dec << (num_pages * kPageSize) << ")";
+    } else {
+      LOG(INFO) << "RosAlloc::AllocLargeObject() : NULL";
+    }
+  }
+  // Check if the returned memory is really all zero.
+  if (kCheckZeroMemory && r != NULL) {
+    byte* bytes = reinterpret_cast<byte*>(r);
+    for (size_t i = 0; i < size; ++i) {
+      DCHECK_EQ(bytes[i], 0);
+    }
+  }
+  return r;
+}
+
+void RosAlloc::FreeInternal(Thread* self, void* ptr) {
+  DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+  size_t pm_idx = RoundDownToPageMapIndex(ptr);
+  bool free_from_run = false;
+  Run* run = NULL;
+  {
+    MutexLock mu(self, lock_);
+    DCHECK(pm_idx < page_map_.size());
+    byte page_map_entry = page_map_[pm_idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
+                << ", page_map_entry=" << static_cast<int>(page_map_entry);
+    }
+    switch (page_map_[pm_idx]) {
+      case kPageMapEmpty:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+      case kPageMapLargeObject:
+        FreePages(self, ptr);
+        return;
+      case kPageMapLargeObjectPart:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+      case kPageMapRun:
+      case kPageMapRunPart: {
+        free_from_run = true;
+        size_t pi = pm_idx;
+        DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
+        // Find the beginning of the run.
+        while (page_map_[pi] != kPageMapRun) {
+          pi--;
+          DCHECK(pi < capacity_ / kPageSize);
+        }
+        DCHECK(page_map_[pi] == kPageMapRun);
+        run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+        DCHECK(run->magic_num_ == kMagicNum);
+        break;
+      }
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+    }
+  }
+  if (LIKELY(free_from_run)) {
+    DCHECK(run != NULL);
+    FreeFromRun(self, ptr, run);
+  }
+}
+
+void RosAlloc::Free(Thread* self, void* ptr) {
+  ReaderMutexLock rmu(self, bulk_free_lock_);
+  FreeInternal(self, ptr);
+}
+
+RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
+  Run* new_run;
+  size_t num_pages = numOfPages[idx];
+  // Get the lowest address non-full run from the binary tree.
+  Run* temp = NULL;
+  std::set<Run*>* bt = &non_full_runs_[idx];
+  std::set<Run*>::iterator found = bt->lower_bound(temp);
+  if (found != bt->end()) {
+    // If there's one, use it as the current run.
+    Run* non_full_run = *found;
+    DCHECK(non_full_run != NULL);
+    new_run = non_full_run;
+    DCHECK_EQ(new_run->is_thread_local_, 0);
+    bt->erase(found);
+    DCHECK_EQ(non_full_run->is_thread_local_, 0);
+  } else {
+    // If there's none, allocate a new run and use it as the
+    // current run.
+    {
+      MutexLock mu(self, lock_);
+      new_run = reinterpret_cast<Run*>(AllocPages(self, num_pages, kPageMapRun));
+    }
+    if (new_run == NULL) {
+      return NULL;
+    }
+    if (kIsDebugBuild) {
+      new_run->magic_num_ = kMagicNum;
+    }
+    new_run->size_bracket_idx_ = idx;
+    new_run->top_slot_idx_ = 0;
+    new_run->ClearBitMaps();
+    new_run->to_be_bulk_freed_ = false;
+  }
+  return new_run;
+}
+
+void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
+  DCHECK(size <= kLargeSizeThreshold);
+  size_t bracket_size;
+  size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
+  DCHECK_EQ(idx, SizeToIndex(size));
+  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+  DCHECK_EQ(bracket_size, bracketSizes[idx]);
+  DCHECK(size <= bracket_size);
+  DCHECK(size > 512 || bracket_size - size < 16);
+
+  void* slot_addr;
+
+  if (LIKELY(idx <= kMaxThreadLocalSizeBracketIdx)) {
+    // Use a thread-local run.
+    Run* thread_local_run = reinterpret_cast<Run*>(self->rosalloc_runs_[idx]);
+    if (UNLIKELY(thread_local_run == NULL)) {
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      thread_local_run = RefillRun(self, idx);
+      if (UNLIKELY(thread_local_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+      thread_local_run->is_thread_local_ = 1;
+      self->rosalloc_runs_[idx] = thread_local_run;
+      DCHECK(!thread_local_run->IsFull());
+    }
+
+    DCHECK(thread_local_run != NULL);
+    DCHECK_NE(thread_local_run->is_thread_local_, 0);
+    slot_addr = thread_local_run->AllocSlot();
+
+    if (UNLIKELY(slot_addr == NULL)) {
+      // The run got full. Try to free slots.
+      DCHECK(thread_local_run->IsFull());
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      bool is_all_free_after_merge;
+      if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
+        // Some slot got freed. Keep it.
+        DCHECK(!thread_local_run->IsFull());
+        DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
+        if (is_all_free_after_merge) {
+          // Reinstate the bump index mode if it's all free.
+          DCHECK_EQ(thread_local_run->top_slot_idx_, numOfSlots[idx]);
+          thread_local_run->top_slot_idx_ = 0;
+        }
+      } else {
+        // No slots got freed. Try to refill the thread-local run.
+        DCHECK(thread_local_run->IsFull());
+        self->rosalloc_runs_[idx] = NULL;
+        thread_local_run->is_thread_local_ = 0;
+        if (kIsDebugBuild) {
+          full_runs_[idx].insert(thread_local_run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(thread_local_run)
+                      << " into full_runs_[" << std::dec << idx << "]";
+          }
+        }
+        DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+        DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
+        thread_local_run = RefillRun(self, idx);
+        if (UNLIKELY(thread_local_run == NULL)) {
+          return NULL;
+        }
+        DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+        DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+        thread_local_run->is_thread_local_ = 1;
+        self->rosalloc_runs_[idx] = thread_local_run;
+        DCHECK(!thread_local_run->IsFull());
+      }
+
+      DCHECK(thread_local_run != NULL);
+      DCHECK(!thread_local_run->IsFull());
+      DCHECK_NE(thread_local_run->is_thread_local_, 0);
+      slot_addr = thread_local_run->AllocSlot();
+      // Must succeed now with a new run.
+      DCHECK(slot_addr != NULL);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
+                << "(" << std::dec << (bracket_size) << ")";
+    }
+  } else {
+    // Use the (shared) current run.
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    Run* current_run = current_runs_[idx];
+    if (UNLIKELY(current_run == NULL)) {
+      current_run = RefillRun(self, idx);
+      if (UNLIKELY(current_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
+      current_run->is_thread_local_ = 0;
+      current_runs_[idx] = current_run;
+      DCHECK(!current_run->IsFull());
+    }
+    DCHECK(current_run != NULL);
+    slot_addr = current_run->AllocSlot();
+    if (UNLIKELY(slot_addr == NULL)) {
+      // The current run got full. Try to refill it.
+      DCHECK(current_run->IsFull());
+      current_runs_[idx] = NULL;
+      if (kIsDebugBuild) {
+        // Insert it into full_runs and set the current run to NULL.
+        full_runs_[idx].insert(current_run);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
+                    << " into full_runs_[" << std::dec << idx << "]";
+        }
+      }
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
+      current_run = RefillRun(self, idx);
+      if (UNLIKELY(current_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(current_run != NULL);
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
+      current_run->is_thread_local_ = 0;
+      current_runs_[idx] = current_run;
+      DCHECK(!current_run->IsFull());
+      slot_addr = current_run->AllocSlot();
+      // Must succeed now with a new run.
+      DCHECK(slot_addr != NULL);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
+                << "(" << std::dec << (bracket_size) << ")";
+    }
+  }
+  if (LIKELY(bytes_allocated != NULL)) {
+    *bytes_allocated = bracket_size;
+  }
+  memset(slot_addr, 0, size);
+  return slot_addr;
+}
+
+void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
+  DCHECK(run->magic_num_ == kMagicNum);
+  DCHECK(run < ptr && ptr < run->End());
+  size_t idx = run->size_bracket_idx_;
+  MutexLock mu(self, *size_bracket_locks_[idx]);
+  bool run_was_full = false;
+  if (kIsDebugBuild) {
+    run_was_full = run->IsFull();
+  }
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
+  }
+  if (LIKELY(run->is_thread_local_ != 0)) {
+    // It's a thread-local run. Just mark the thread-local free bit map and return.
+    DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+    DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+    DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+    run->MarkThreadLocalFreeBitMap(ptr);
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
+                << reinterpret_cast<intptr_t>(run);
+    }
+    // A thread local run will be kept as a thread local even if it's become all free.
+    return;
+  }
+  // Free the slot in the run.
+  run->FreeSlot(ptr);
+  std::set<Run*>* non_full_runs = &non_full_runs_[idx];
+  if (run->IsAllFree()) {
+    // It has just become completely free. Free the pages of this run.
+    std::set<Run*>::iterator pos = non_full_runs->find(run);
+    if (pos != non_full_runs->end()) {
+      non_full_runs->erase(pos);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(run) << " from non_full_runs_";
+      }
+    }
+    if (run == current_runs_[idx]) {
+      current_runs_[idx] = NULL;
+    }
+    DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+    DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+    {
+      MutexLock mu(self, lock_);
+      FreePages(self, run);
+    }
+  } else {
+    // It is not completely free. If it wasn't the current run or
+    // already in the non-full run set (i.e., it was full) insert it
+    // into the non-full run set.
+    if (run != current_runs_[idx]) {
+      hash_set<Run*, hash_run, eq_run>* full_runs =
+          kIsDebugBuild ? &full_runs_[idx] : NULL;
+      std::set<Run*>::iterator pos = non_full_runs->find(run);
+      if (pos == non_full_runs->end()) {
+        DCHECK(run_was_full);
+        DCHECK(full_runs->find(run) != full_runs->end());
+        if (kIsDebugBuild) {
+          full_runs->erase(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run) << " from full_runs_";
+          }
+        }
+        non_full_runs->insert(run);
+        DCHECK(!run->IsFull());
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreeFromRun() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(run)
+                    << " into non_full_runs_[" << std::dec << idx << "]";
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::Run::Dump() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  std::string bit_map_str;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = alloc_bit_map_[v];
+    if (v != num_vec - 1) {
+      bit_map_str.append(StringPrintf("%x-", vec));
+    } else {
+      bit_map_str.append(StringPrintf("%x", vec));
+    }
+  }
+  LOG(INFO) << "Run : " << std::hex << reinterpret_cast<intptr_t>(this)
+            << std::dec << ", idx=" << idx << ", bit_map=" << bit_map_str;
+}
+
+void* RosAlloc::Run::AllocSlot() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  DCHECK_LE(top_slot_idx_, num_slots);
+  if (LIKELY(top_slot_idx_ < num_slots)) {
+    // If it's in bump index mode, grab the top slot and increment the top index.
+    size_t slot_idx = top_slot_idx_;
+    byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    }
+    top_slot_idx_++;
+    size_t vec_idx = slot_idx / 32;
+    size_t vec_off = slot_idx % 32;
+    uint32_t* vec = &alloc_bit_map_[vec_idx];
+    DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+    *vec |= 1 << vec_off;
+    DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+    return slot_addr;
+  }
+  // Not in bump index mode. Search the alloc bit map for an empty slot.
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slot_idx = 0;
+  bool found_slot = false;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t *vecp = &alloc_bit_map_[v];
+    uint32_t ffz1 = __builtin_ffs(~*vecp);
+    uint32_t ffz;
+    // TODO: Use LIKELY or UNLIKELY here?
+    if (LIKELY(ffz1 > 0 && (ffz = ffz1 - 1) + v * 32 < num_slots)) {
+      // Found an empty slot. Set the bit.
+      DCHECK_EQ((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
+      *vecp |= (1 << ffz);
+      DCHECK_NE((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
+      slot_idx = ffz + v * 32;
+      found_slot = true;
+      break;
+    }
+  }
+  if (LIKELY(found_slot)) {
+    byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    }
+    return slot_addr;
+  }
+  return NULL;
+}
+
+inline void RosAlloc::Run::FreeSlot(void* ptr) {
+  DCHECK_EQ(is_thread_local_, 0);
+  byte idx = size_bracket_idx_;
+  size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+      - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
+  DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+  size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+  DCHECK(slot_idx < numOfSlots[idx]);
+  size_t vec_idx = slot_idx / 32;
+  if (kIsDebugBuild) {
+    size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
+    DCHECK(vec_idx < num_vec);
+  }
+  size_t vec_off = slot_idx % 32;
+  uint32_t* vec = &alloc_bit_map_[vec_idx];
+  DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  *vec &= ~(1 << vec_off);
+  DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+  }
+}
+
+inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
+  DCHECK_NE(is_thread_local_, 0);
+  // Free slots in the alloc bit map based on the thread local free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  bool changed = false;
+  uint32_t* vecp = &alloc_bit_map_[0];
+  uint32_t* tl_free_vecp = &thread_local_free_bit_map()[0];
+  bool is_all_free_after = true;
+  for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
+    uint32_t tl_free_vec = *tl_free_vecp;
+    uint32_t vec_before = *vecp;
+    uint32_t vec_after;
+    if (tl_free_vec != 0) {
+      vec_after = vec_before & ~tl_free_vec;
+      *vecp = vec_after;
+      changed = true;
+      *tl_free_vecp = 0;  // clear the thread local free bit map.
+    } else {
+      vec_after = vec_before;
+    }
+    if (vec_after != 0) {
+      is_all_free_after = false;
+    }
+    DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
+  }
+  *is_all_free_after_out = is_all_free_after;
+  // Return true if there was at least a bit set in the thread-local
+  // free bit map and at least a bit in the alloc bit map changed.
+  return changed;
+}
+
+inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
+  DCHECK_EQ(is_thread_local_, 0);
+  // Free slots in the alloc bit map based on the bulk free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  uint32_t* vecp = &alloc_bit_map_[0];
+  uint32_t* free_vecp = &bulk_free_bit_map()[0];
+  for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
+    uint32_t free_vec = *free_vecp;
+    if (free_vec != 0) {
+      *vecp &= ~free_vec;
+      *free_vecp = 0;  // clear the bulk free bit map.
+    }
+    DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
+  }
+}
+
+inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
+  DCHECK_NE(is_thread_local_, 0);
+  // Union the thread local bit map with the bulk free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  uint32_t* to_vecp = &thread_local_free_bit_map()[0];
+  uint32_t* from_vecp = &bulk_free_bit_map()[0];
+  for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
+    uint32_t from_vec = *from_vecp;
+    if (from_vec != 0) {
+      *to_vecp |= from_vec;
+      *from_vecp = 0;  // clear the from free bit map.
+    }
+    DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
+  }
+}
+
+inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
+  DCHECK_NE(is_thread_local_, 0);
+  MarkFreeBitMapShared(ptr, thread_local_free_bit_map(), "MarkThreadLocalFreeBitMap");
+}
+
+inline void RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
+  MarkFreeBitMapShared(ptr, bulk_free_bit_map(), "MarkFreeBitMap");
+}
+
+inline void RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
+                                              const char* caller_name) {
+  byte idx = size_bracket_idx_;
+  size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+      - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
+  DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+  size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+  DCHECK(slot_idx < numOfSlots[idx]);
+  size_t vec_idx = slot_idx / 32;
+  if (kIsDebugBuild) {
+    size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
+    DCHECK(vec_idx < num_vec);
+  }
+  size_t vec_off = slot_idx % 32;
+  uint32_t* vec = &free_bit_map_base[vec_idx];
+  DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  *vec |= 1 << vec_off;
+  DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
+              << reinterpret_cast<intptr_t>(ptr)
+              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+  }
+}
+
+inline bool RosAlloc::Run::IsAllFree() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = alloc_bit_map_[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool RosAlloc::Run::IsFull() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots);
+    uint32_t vec = alloc_bit_map_[v];
+    uint32_t mask = (num_slots - slots >= 32) ? static_cast<uint32_t>(-1)
+        : (1 << (num_slots - slots)) - 1;
+    DCHECK(num_slots - slots >= 32 ? mask == static_cast<uint32_t>(-1) : true);
+    if (vec != mask) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void RosAlloc::Run::ClearBitMaps() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  memset(alloc_bit_map_, 0, sizeof(uint32_t) * num_vec * 3);
+}
+
+void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                                    void* arg) {
+  size_t idx = size_bracket_idx_;
+  byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
+  size_t num_slots = numOfSlots[idx];
+  size_t bracket_size = IndexToBracketSize(idx);
+  DCHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize);
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots);
+    uint32_t vec = alloc_bit_map_[v];
+    size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
+    for (size_t i = 0; i < end; ++i) {
+      bool is_allocated = ((vec >> i) & 0x1) != 0;
+      byte* slot_addr = slot_base + (slots + i) * bracket_size;
+      if (is_allocated) {
+        handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
+      } else {
+        handler(slot_addr, slot_addr + bracket_size, 0, arg);
+      }
+    }
+  }
+}
+
+void RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
+  if (false) {
+    // Used only to test Free() as GC uses only BulkFree().
+    for (size_t i = 0; i < num_ptrs; ++i) {
+      FreeInternal(self, ptrs[i]);
+    }
+    return;
+  }
+
+  WriterMutexLock wmu(self, bulk_free_lock_);
+
+  // First mark slots to free in the bulk free bit map without locking the
+  // size bracket locks. On host, hash_set is faster than vector + flag.
+#ifdef HAVE_ANDROID_OS
+  std::vector<Run*> runs;
+#else
+  hash_set<Run*, hash_run, eq_run> runs;
+#endif
+  {
+    for (size_t i = 0; i < num_ptrs; i++) {
+      void* ptr = ptrs[i];
+      ptrs[i] = NULL;
+      DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+      size_t pm_idx = RoundDownToPageMapIndex(ptr);
+      bool free_from_run = false;
+      Run* run = NULL;
+      {
+        MutexLock mu(self, lock_);
+        DCHECK(pm_idx < page_map_.size());
+        byte page_map_entry = page_map_[pm_idx];
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
+                    << std::dec << pm_idx
+                    << ", page_map_entry=" << static_cast<int>(page_map_entry);
+        }
+        if (LIKELY(page_map_entry == kPageMapRun)) {
+          free_from_run = true;
+          run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum);
+        } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
+          free_from_run = true;
+          size_t pi = pm_idx;
+          DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
+          // Find the beginning of the run.
+          while (page_map_[pi] != kPageMapRun) {
+            pi--;
+            DCHECK(pi < capacity_ / kPageSize);
+          }
+          DCHECK(page_map_[pi] == kPageMapRun);
+          run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum);
+        } else if (page_map_entry == kPageMapLargeObject) {
+          FreePages(self, ptr);
+        } else {
+          LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
+        }
+      }
+      if (LIKELY(free_from_run)) {
+        DCHECK(run != NULL);
+        // Set the bit in the bulk free bit map.
+        run->MarkBulkFreeBitMap(ptr);
+#ifdef HAVE_ANDROID_OS
+        if (!run->to_be_bulk_freed_) {
+          run->to_be_bulk_freed_ = true;
+          runs.push_back(run);
+        }
+#else
+        runs.insert(run);
+#endif
+      }
+    }
+  }
+
+  // Now, iterate over the affected runs and update the alloc bit map
+  // based on the bulk free bit map (for non-thread-local runs) and
+  // union the bulk free bit map into the thread-local free bit map
+  // (for thread-local runs.)
+#ifdef HAVE_ANDROID_OS
+  typedef std::vector<Run*>::iterator It;
+#else
+  typedef hash_set<Run*, hash_run, eq_run>::iterator It;
+#endif
+  for (It it = runs.begin(); it != runs.end(); ++it) {
+    Run* run = *it;
+#ifdef HAVE_ANDROID_OS
+    DCHECK(run->to_be_bulk_freed_);
+    run->to_be_bulk_freed_ = false;
+#endif
+    size_t idx = run->size_bracket_idx_;
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    if (run->is_thread_local_ != 0) {
+      DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+      DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+      run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
+                  << std::hex << reinterpret_cast<intptr_t>(run);
+      }
+      DCHECK_NE(run->is_thread_local_, 0);
+      // A thread local run will be kept as a thread local even if
+      // it's become all free.
+    } else {
+      bool run_was_full = run->IsFull();
+      run->MergeBulkFreeBitMapIntoAllocBitMap();
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(run);
+      }
+      // Check if the run should be moved to non_full_runs_ or
+      // free_page_runs_.
+      std::set<Run*>* non_full_runs = &non_full_runs_[idx];
+      hash_set<Run*, hash_run, eq_run>* full_runs =
+          kIsDebugBuild ? &full_runs_[idx] : NULL;
+      if (run->IsAllFree()) {
+        // It has just become completely free. Free the pages of the
+        // run.
+        bool run_was_current = run == current_runs_[idx];
+        if (run_was_current) {
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          // If it was a current run, reuse it.
+        } else if (run_was_full) {
+          // If it was full, remove it from the full run set (debug
+          // only.)
+          if (kIsDebugBuild) {
+            hash_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
+            DCHECK(pos != full_runs->end());
+            full_runs->erase(pos);
+            if (kTraceRosAlloc) {
+              LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                        << reinterpret_cast<intptr_t>(run)
+                        << " from full_runs_";
+            }
+            DCHECK(full_runs->find(run) == full_runs->end());
+          }
+        } else {
+          // If it was in a non full run set, remove it from the set.
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) != non_full_runs->end());
+          non_full_runs->erase(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run)
+                      << " from non_full_runs_";
+          }
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+        }
+        if (!run_was_current) {
+          MutexLock mu(self, lock_);
+          FreePages(self, run);
+        }
+      } else {
+        // It is not completely free. If it wasn't the current run or
+        // already in the non-full run set (i.e., it was full) insert
+        // it into the non-full run set.
+        if (run == current_runs_[idx]) {
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          DCHECK(full_runs->find(run) == full_runs->end());
+          // If it was a current run, keep it.
+        } else if (run_was_full) {
+          // If it was full, remove it from the full run set (debug
+          // only) and insert into the non-full run set.
+          DCHECK(full_runs->find(run) != full_runs->end());
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          if (kIsDebugBuild) {
+            full_runs->erase(run);
+            if (kTraceRosAlloc) {
+              LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                        << reinterpret_cast<intptr_t>(run)
+                        << " from full_runs_";
+            }
+          }
+          non_full_runs->insert(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::BulkFree() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run)
+                      << " into non_full_runs_[" << std::dec << idx;
+          }
+        } else {
+          // If it was not full, so leave it in the non full run set.
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) != non_full_runs->end());
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::DumpPageMap(Thread* self) {
+  MutexLock mu(self, lock_);
+  size_t end = page_map_.size();
+  FreePageRun* curr_fpr = NULL;
+  size_t curr_fpr_size = 0;
+  size_t remaining_curr_fpr_size = 0;
+  size_t num_running_empty_pages = 0;
+  for (size_t i = 0; i < end; ++i) {
+    byte pm = page_map_[i];
+    switch (pm) {
+      case kPageMapEmpty: {
+        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+        if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
+          // Encountered a fresh free page run.
+          DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+          DCHECK(fpr->IsFree());
+          DCHECK(curr_fpr == NULL);
+          DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
+          curr_fpr = fpr;
+          curr_fpr_size = fpr->ByteSize(this);
+          DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
+          remaining_curr_fpr_size = curr_fpr_size - kPageSize;
+          LOG(INFO) << "[" << i << "]=Empty (FPR start)"
+                    << " fpr_size=" << curr_fpr_size
+                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          if (remaining_curr_fpr_size == 0) {
+            // Reset at the end of the current free page run.
+            curr_fpr = NULL;
+            curr_fpr_size = 0;
+          }
+          LOG(INFO) << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr);
+          DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
+        } else {
+          // Still part of the current free page run.
+          DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
+          DCHECK(curr_fpr != NULL && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
+          DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
+          DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
+          remaining_curr_fpr_size -= kPageSize;
+          LOG(INFO) << "[" << i << "]=Empty (FPR part)"
+                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          if (remaining_curr_fpr_size == 0) {
+            // Reset at the end of the current free page run.
+            curr_fpr = NULL;
+            curr_fpr_size = 0;
+          }
+        }
+        num_running_empty_pages++;
+        break;
+      }
+      case kPageMapLargeObject: {
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Large (start)";
+        break;
+      }
+      case kPageMapLargeObjectPart:
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Large (part)";
+        break;
+      case kPageMapRun: {
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+        size_t idx = run->size_bracket_idx_;
+        LOG(INFO) << "[" << i << "]=Run (start)"
+                  << " idx=" << idx
+                  << " numOfPages=" << numOfPages[idx]
+                  << " thread_local=" << static_cast<int>(run->is_thread_local_)
+                  << " is_all_free=" << (run->IsAllFree() ? 1 : 0);
+        break;
+      }
+      case kPageMapRunPart:
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Run (part)";
+        break;
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+    }
+  }
+}
+
+size_t RosAlloc::UsableSize(void* ptr) {
+  DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+  size_t pm_idx = RoundDownToPageMapIndex(ptr);
+  MutexLock mu(Thread::Current(), lock_);
+  switch (page_map_[pm_idx]) {
+  case kPageMapEmpty:
+    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    break;
+  case kPageMapLargeObject: {
+    size_t num_pages = 1;
+    size_t idx = pm_idx + 1;
+    size_t end = page_map_.size();
+    while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
+      num_pages++;
+      idx++;
+    }
+    return num_pages * kPageSize;
+  }
+  case kPageMapLargeObjectPart:
+    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    break;
+  case kPageMapRun:
+  case kPageMapRunPart: {
+    // Find the beginning of the run.
+    while (page_map_[pm_idx] != kPageMapRun) {
+      pm_idx--;
+      DCHECK(pm_idx < capacity_ / kPageSize);
+    }
+    DCHECK(page_map_[pm_idx] == kPageMapRun);
+    Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+    DCHECK(run->magic_num_ == kMagicNum);
+    size_t idx = run->size_bracket_idx_;
+    size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+        - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
+    DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+    return IndexToBracketSize(idx);
+  }
+  default:
+    LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+    break;
+  }
+  return 0;
+}
+
+bool RosAlloc::Trim() {
+  MutexLock mu(Thread::Current(), lock_);
+  FreePageRun* last_free_page_run;
+  DCHECK_EQ(footprint_ % kPageSize, static_cast<size_t>(0));
+  auto it = free_page_runs_.rbegin();
+  if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
+    // Remove the last free page run, if any.
+    DCHECK(last_free_page_run->IsFree());
+    DCHECK(page_map_[ToPageMapIndex(last_free_page_run)] == kPageMapEmpty);
+    DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+    DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
+    free_page_runs_.erase(last_free_page_run);
+    size_t decrement = last_free_page_run->ByteSize(this);
+    size_t new_footprint = footprint_ - decrement;
+    DCHECK_EQ(new_footprint % kPageSize, static_cast<size_t>(0));
+    size_t new_num_of_pages = new_footprint / kPageSize;
+    DCHECK_GE(page_map_.size(), new_num_of_pages);
+    page_map_.resize(new_num_of_pages);
+    DCHECK_EQ(page_map_.size(), new_num_of_pages);
+    free_page_run_size_map_.resize(new_num_of_pages);
+    DCHECK_EQ(free_page_run_size_map_.size(), new_num_of_pages);
+    art_heap_rosalloc_morecore(this, -(static_cast<intptr_t>(decrement)));
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Trim() : decreased the footprint from "
+                << footprint_ << " to " << new_footprint;
+    }
+    DCHECK_LT(new_footprint, footprint_);
+    DCHECK_LT(new_footprint, capacity_);
+    footprint_ = new_footprint;
+    return true;
+  }
+  return false;
+}
+
+void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                          void* arg) {
+  // Note: no need to use this to release pages as we already do so in FreePages().
+  if (handler == NULL) {
+    return;
+  }
+  MutexLock mu(Thread::Current(), lock_);
+  size_t pm_end = page_map_.size();
+  size_t i = 0;
+  while (i < pm_end) {
+    byte pm = page_map_[i];
+    switch (pm) {
+      case kPageMapEmpty: {
+        // The start of a free page run.
+        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+        DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+        size_t fpr_size = fpr->ByteSize(this);
+        DCHECK(IsAligned<kPageSize>(fpr_size));
+        void* start = fpr;
+        void* end = reinterpret_cast<byte*>(start) + fpr_size;
+        handler(start, end, 0, arg);
+        size_t num_pages = fpr_size / kPageSize;
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapEmpty);
+          }
+        }
+        i += fpr_size / kPageSize;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapLargeObject: {
+        // The start of a large object.
+        size_t num_pages = 1;
+        size_t idx = i + 1;
+        while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
+          num_pages++;
+          idx++;
+        }
+        void* start = base_ + i * kPageSize;
+        void* end = base_ + (i + num_pages) * kPageSize;
+        size_t used_bytes = num_pages * kPageSize;
+        handler(start, end, used_bytes, arg);
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapLargeObjectPart);
+          }
+        }
+        i += num_pages;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapLargeObjectPart:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+      case kPageMapRun: {
+        // The start of a run.
+        Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+        DCHECK(run->magic_num_ == kMagicNum);
+        run->InspectAllSlots(handler, arg);
+        size_t num_pages = numOfPages[run->size_bracket_idx_];
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapRunPart);
+          }
+        }
+        i += num_pages;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapRunPart:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+    }
+  }
+}
+
+size_t RosAlloc::Footprint() {
+  MutexLock mu(Thread::Current(), lock_);
+  return footprint_;
+}
+
+size_t RosAlloc::FootprintLimit() {
+  MutexLock mu(Thread::Current(), lock_);
+  return capacity_;
+}
+
+void RosAlloc::SetFootprintLimit(size_t new_capacity) {
+  MutexLock mu(Thread::Current(), lock_);
+  DCHECK_EQ(RoundUp(new_capacity, kPageSize), new_capacity);
+  // Only growing is supported here. But Trim() is supported.
+  if (capacity_ < new_capacity) {
+    capacity_ = new_capacity;
+    VLOG(heap) << "new capacity=" << capacity_;
+  }
+}
+
+void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
+  Thread* self = Thread::Current();
+  for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[idx]);
+    if (thread_local_run != NULL) {
+      DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
+      DCHECK_NE(thread_local_run->is_thread_local_, 0);
+      thread->rosalloc_runs_[idx] = NULL;
+      // Note the thread local run may not be full here.
+      bool dont_care;
+      thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
+      thread_local_run->is_thread_local_ = 0;
+      thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
+      DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+      if (thread_local_run->IsFull()) {
+        if (kIsDebugBuild) {
+          full_runs_[idx].insert(thread_local_run);
+          DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(thread_local_run)
+                      << " into full_runs_[" << std::dec << idx << "]";
+          }
+        }
+      } else if (thread_local_run->IsAllFree()) {
+        MutexLock mu(self, lock_);
+        FreePages(self, thread_local_run);
+      } else {
+        non_full_runs_[idx].insert(thread_local_run);
+        DCHECK(non_full_runs_[idx].find(thread_local_run) != non_full_runs_[idx].end());
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(thread_local_run)
+                    << " into non_full_runs_[" << std::dec << idx << "]";
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::RevokeAllThreadLocalRuns() {
+  // This is called when a mutator thread won't allocate such as at
+  // the Zygote creation time or during the GC pause.
+  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+  std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+  for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
+    Thread* t = *it;
+    RevokeThreadLocalRuns(t);
+  }
+}
+
+void RosAlloc::Initialize() {
+  // Check the consistency of the number of size brackets.
+  DCHECK_EQ(Thread::kRosAllocNumOfSizeBrackets, kNumOfSizeBrackets);
+  // bracketSizes.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    if (i < kNumOfSizeBrackets - 2) {
+      bracketSizes[i] = 16 * (i + 1);
+    } else if (i == kNumOfSizeBrackets - 2) {
+      bracketSizes[i] = 1 * KB;
+    } else {
+      DCHECK(i == kNumOfSizeBrackets - 1);
+      bracketSizes[i] = 2 * KB;
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
+    }
+  }
+  // numOfPages.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    if (i < 4) {
+      numOfPages[i] = 1;
+    } else if (i < 8) {
+      numOfPages[i] = 2;
+    } else if (i < 16) {
+      numOfPages[i] = 4;
+    } else if (i < 32) {
+      numOfPages[i] = 8;
+    } else if (i == 32) {
+      DCHECK(i = kNumOfSizeBrackets - 2);
+      numOfPages[i] = 16;
+    } else {
+      DCHECK(i = kNumOfSizeBrackets - 1);
+      numOfPages[i] = 32;
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
+    }
+  }
+  // Compute numOfSlots and slotOffsets.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    size_t bracket_size = bracketSizes[i];
+    size_t run_size = kPageSize * numOfPages[i];
+    size_t max_num_of_slots = run_size / bracket_size;
+    // Compute the actual number of slots by taking the header and
+    // alignment into account.
+    size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
+    DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
+    size_t header_size = 0;
+    size_t bulk_free_bit_map_offset = 0;
+    size_t thread_local_free_bit_map_offset = 0;
+    size_t num_of_slots = 0;
+    // Search for the maximum number of slots that allows enough space
+    // for the header (including the bit maps.)
+    for (int s = max_num_of_slots; s >= 0; s--) {
+      size_t tmp_slots_size = bracket_size * s;
+      size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
+      size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
+      size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
+      size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
+      size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
+      size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
+      // Align up the unaligned header size. bracket_size may not be a power of two.
+      size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
+          tmp_unaligned_header_size :
+          tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
+      DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
+      DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
+      if (tmp_slots_size + tmp_header_size <= run_size) {
+        // Found the right number of slots, that is, there was enough
+        // space for the header (including the bit maps.)
+        num_of_slots = s;
+        header_size = tmp_header_size;
+        bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
+        thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
+        break;
+      }
+    }
+    DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
+    // Add the padding for the alignment remainder.
+    header_size += run_size % bracket_size;
+    DCHECK(header_size + num_of_slots * bracket_size == run_size);
+    numOfSlots[i] = num_of_slots;
+    headerSizes[i] = header_size;
+    bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
+    threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
+                << ", headerSizes[" << i << "]=" << headerSizes[i]
+                << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
+                << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
+    }
+  }
+}
+
+void RosAlloc::BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
+  *bytes_allocated += used_bytes;
+}
+
+void RosAlloc::ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
+  ++(*objects_allocated);
+}
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
new file mode 100644
index 0000000..d5b6de1
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc.h
@@ -0,0 +1,489 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+
+#include <set>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <sys/mman.h>
+#include <vector>
+
+#include "base/mutex.h"
+#include "base/logging.h"
+#include "globals.h"
+#include "utils.h"
+
+// A boilerplate to use hash_map/hash_set both on host and device.
+#ifdef HAVE_ANDROID_OS
+#include <hash_map>
+#include <hash_set>
+using std::hash_map;
+using std::hash_set;
+#else  // HAVE_ANDROID_OS
+#ifdef __DEPRECATED
+#define ROSALLOC_OLD__DEPRECATED __DEPRECATED
+#undef __DEPRECATED
+#endif
+#include <ext/hash_map>
+#include <ext/hash_set>
+#ifdef ROSALLOC_OLD__DEPRECATED
+#define __DEPRECATED ROSALLOC_OLD__DEPRECATED
+#undef ROSALLOC_OLD__DEPRECATED
+#endif
+using __gnu_cxx::hash_map;
+using __gnu_cxx::hash_set;
+#endif  // HAVE_ANDROID_OS
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+// A Runs-of-slots memory allocator.
+class RosAlloc {
+ private:
+  // Rerepresents a run of free pages.
+  class FreePageRun {
+   public:
+    byte magic_num_;  // The magic number used for debugging only.
+
+    bool IsFree() const {
+      if (kIsDebugBuild) {
+        return magic_num_ == kMagicNumFree;
+      }
+      return true;
+    }
+    size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      const byte* fpr_base = reinterpret_cast<const byte*>(this);
+      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+      size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
+      DCHECK_GE(byte_size, static_cast<size_t>(0));
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      return byte_size;
+    }
+    void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
+        EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      byte* fpr_base = reinterpret_cast<byte*>(this);
+      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+      rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
+    }
+    void* Begin() {
+      return reinterpret_cast<void*>(this);
+    }
+    void* End(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      byte* fpr_base = reinterpret_cast<byte*>(this);
+      byte* end = fpr_base + ByteSize(rosalloc);
+      return end;
+    }
+    void ReleasePages(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      size_t byte_size = ByteSize(rosalloc);
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      if (kIsDebugBuild) {
+        // Exclude the first page that stores the magic number.
+        DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
+        byte_size -= kPageSize;
+        if (byte_size > 0) {
+          madvise(reinterpret_cast<byte*>(this) + kPageSize, byte_size, MADV_DONTNEED);
+        }
+      } else {
+        madvise(this, byte_size, MADV_DONTNEED);
+      }
+    }
+  };
+
+  // Represents a run of memory slots of the same size.
+  //
+  // A run's memory layout:
+  //
+  // +-------------------+
+  // | magic_num         |
+  // +-------------------+
+  // | size_bracket_idx  |
+  // +-------------------+
+  // | is_thread_local   |
+  // +-------------------+
+  // | to_be_bulk_freed  |
+  // +-------------------+
+  // | top_slot_idx      |
+  // +-------------------+
+  // |                   |
+  // | alloc bit map     |
+  // |                   |
+  // +-------------------+
+  // |                   |
+  // | bulk free bit map |
+  // |                   |
+  // +-------------------+
+  // |                   |
+  // | thread-local free |
+  // | bit map           |
+  // |                   |
+  // +-------------------+
+  // | padding due to    |
+  // | alignment         |
+  // +-------------------+
+  // | slot 0            |
+  // +-------------------+
+  // | slot 1            |
+  // +-------------------+
+  // | slot 2            |
+  // +-------------------+
+  // ...
+  // +-------------------+
+  // | last slot         |
+  // +-------------------+
+  //
+  class Run {
+   public:
+    byte magic_num_;             // The magic number used for debugging.
+    byte size_bracket_idx_;      // The index of the size bracket of this run.
+    byte is_thread_local_;       // True if this run is used as a thread-local run.
+    byte to_be_bulk_freed_;      // Used within BulkFree() to flag a run that's involved with a bulk free.
+    uint32_t top_slot_idx_;      // The top slot index when this run is in bump index mode.
+    uint32_t alloc_bit_map_[0];  // The bit map that allocates if each slot is in use.
+
+    // bulk_free_bit_map_[] : The bit map that is used for GC to
+    // temporarily mark the slots to free without using a lock. After
+    // all the slots to be freed in a run are marked, all those slots
+    // get freed in bulk with one locking per run, as opposed to one
+    // locking per slot to minimize the lock contention. This is used
+    // within BulkFree().
+
+    // thread_local_free_bit_map_[] : The bit map that is used for GC
+    // to temporarily mark the slots to free in a thread-local run
+    // without using a lock (without synchronizing the thread that
+    // owns the thread-local run.) When the thread-local run becomes
+    // full, the thread will check this bit map and update the
+    // allocation bit map of the run (that is, the slots get freed.)
+
+    // Returns the byte size of the header except for the bit maps.
+    static size_t fixed_header_size() {
+      Run temp;
+      size_t size = reinterpret_cast<byte*>(&temp.alloc_bit_map_) - reinterpret_cast<byte*>(&temp);
+      DCHECK_EQ(size, static_cast<size_t>(8));
+      return size;
+    }
+    // Returns the base address of the free bit map.
+    uint32_t* bulk_free_bit_map() {
+      return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
+    }
+    // Returns the base address of the thread local free bit map.
+    uint32_t* thread_local_free_bit_map() {
+      return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
+    }
+    void* End() {
+      return reinterpret_cast<byte*>(this) + kPageSize * numOfPages[size_bracket_idx_];
+    }
+    // Frees slots in the allocation bit map with regard to the
+    // thread-local free bit map. Used when a thread-local run becomes
+    // full.
+    bool MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out);
+    // Frees slots in the allocation bit map with regard to the bulk
+    // free bit map. Used in a bulk free.
+    void MergeBulkFreeBitMapIntoAllocBitMap();
+    // Unions the slots to be freed in the free bit map into the
+    // thread-local free bit map. In a bulk free, as a two-step
+    // process, GC will first record all the slots to free in a run in
+    // the free bit map where it can write without a lock, and later
+    // acquire a lock once per run to union the bits of the free bit
+    // map to the thread-local free bit map.
+    void UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+    // Allocates a slot in a run.
+    void* AllocSlot();
+    // Frees a slot in a run. This is used in a non-bulk free.
+    void FreeSlot(void* ptr);
+    // Marks the slots to free in the bulk free bit map.
+    void MarkBulkFreeBitMap(void* ptr);
+    // Marks the slots to free in the thread-local free bit map.
+    void MarkThreadLocalFreeBitMap(void* ptr);
+    // Returns true if all the slots in the run are not in use.
+    bool IsAllFree();
+    // Returns true if all the slots in the run are in use.
+    bool IsFull();
+    // Clear all the bit maps.
+    void ClearBitMaps();
+    // Iterate over all the slots and apply the given function.
+    void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
+    // Dump the run metadata for debugging.
+    void Dump();
+
+   private:
+    // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
+    void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
+  };
+
+  // The magic number for a run.
+  static const byte kMagicNum = 42;
+  // The magic number for free pages.
+  static const byte kMagicNumFree = 43;
+  // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
+  static const size_t kNumOfSizeBrackets = 34;
+  // The number of smaller size brackets that are 16 bytes apart.
+  static const size_t kNumOfQuantumSizeBrackets = 32;
+  // The sizes (the slot sizes, in bytes) of the size brackets.
+  static size_t bracketSizes[kNumOfSizeBrackets];
+  // The numbers of pages that are used for runs for each size bracket.
+  static size_t numOfPages[kNumOfSizeBrackets];
+  // The numbers of slots of the runs for each size bracket.
+  static size_t numOfSlots[kNumOfSizeBrackets];
+  // The header sizes in bytes of the runs for each size bracket.
+  static size_t headerSizes[kNumOfSizeBrackets];
+  // The byte offsets of the bulk free bit maps of the runs for each size bracket.
+  static size_t bulkFreeBitMapOffsets[kNumOfSizeBrackets];
+  // The byte offsets of the thread-local free bit maps of the runs for each size bracket.
+  static size_t threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
+
+  // Initialize the run specs (the above arrays).
+  static void Initialize();
+  static bool initialized_;
+
+  // Returns the byte size of the bracket size from the index.
+  static size_t IndexToBracketSize(size_t idx) {
+    DCHECK(idx < kNumOfSizeBrackets);
+    return bracketSizes[idx];
+  }
+  // Returns the index of the size bracket from the bracket size.
+  static size_t BracketSizeToIndex(size_t size) {
+    DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
+    size_t idx;
+    if (UNLIKELY(size == 1 * KB)) {
+      idx = kNumOfSizeBrackets - 2;
+    } else if (UNLIKELY(size == 2 * KB)) {
+      idx = kNumOfSizeBrackets - 1;
+    } else {
+      DCHECK(size < 1 * KB);
+      DCHECK_EQ(size % 16, static_cast<size_t>(0));
+      idx = size / 16 - 1;
+    }
+    DCHECK(bracketSizes[idx] == size);
+    return idx;
+  }
+  // Rounds up the size up the nearest bracket size.
+  static size_t RoundToBracketSize(size_t size) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      return RoundUp(size, 16);
+    } else if (512 < size && size <= 1 * KB) {
+      return 1 * KB;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      return 2 * KB;
+    }
+  }
+  // Returns the size bracket index from the byte size with rounding.
+  static size_t SizeToIndex(size_t size) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      return RoundUp(size, 16) / 16 - 1;
+    } else if (512 < size && size <= 1 * KB) {
+      return kNumOfSizeBrackets - 2;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      return kNumOfSizeBrackets - 1;
+    }
+  }
+  // A combination of SizeToIndex() and RoundToBracketSize().
+  static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      size_t bracket_size = RoundUp(size, 16);
+      *bracket_size_out = bracket_size;
+      size_t idx = bracket_size / 16 - 1;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    } else if (512 < size && size <= 1 * KB) {
+      size_t bracket_size = 1024;
+      *bracket_size_out = bracket_size;
+      size_t idx = kNumOfSizeBrackets - 2;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      size_t bracket_size = 2048;
+      *bracket_size_out = bracket_size;
+      size_t idx = kNumOfSizeBrackets - 1;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    }
+  }
+  // Returns the page map index from an address. Requires that the
+  // address is page size aligned.
+  size_t ToPageMapIndex(const void* addr) const {
+    DCHECK(base_ <= addr && addr < base_ + capacity_);
+    size_t byte_offset = reinterpret_cast<const byte*>(addr) - base_;
+    DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
+    return byte_offset / kPageSize;
+  }
+  // Returns the page map index from an address with rounding.
+  size_t RoundDownToPageMapIndex(void* addr) {
+    DCHECK(base_ <= addr && addr < reinterpret_cast<byte*>(base_) + capacity_);
+    return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
+  }
+
+  // A memory allocation request larger than this size is treated as a large object and allocated
+  // at a page-granularity.
+  static const size_t kLargeSizeThreshold = 2048;
+
+  // We use use thread-local runs for the size Brackets whose indexes
+  // are less than or equal to this index. We use shared (current)
+  // runs for the rest.
+  static const size_t kMaxThreadLocalSizeBracketIdx = 10;
+
+  // If true, check that the returned memory is actually zero.
+  static constexpr bool kCheckZeroMemory = kIsDebugBuild;
+
+  // If true, log verbose details of operations.
+  static constexpr bool kTraceRosAlloc = false;
+
+  struct hash_run {
+    size_t operator()(const RosAlloc::Run* r) const {
+      return reinterpret_cast<size_t>(r);
+    }
+  };
+
+  struct eq_run {
+    bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
+      return r1 == r2;
+    }
+  };
+
+  // The base address of the memory region that's managed by this allocator.
+  byte* base_;
+
+  // The footprint in bytes of the currently allocated portion of the
+  // memory region.
+  size_t footprint_;
+
+  // The maximum footprint. The address, base_ + capacity_, indicates
+  // the end of the memory region that's managed by this allocator.
+  size_t capacity_;
+
+  // The run sets that hold the runs whose slots are not all
+  // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
+  std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
+  // The run sets that hold the runs whose slots are all full. This is
+  // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
+  hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
+  // The set of free pages.
+  std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
+  // The free page run whose end address is the end of the memory
+  // region that's managed by this allocator, if any.
+  FreePageRun* last_free_page_run_;
+  // The current runs where the allocations are first attempted for
+  // the size brackes that do not use thread-local
+  // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
+  Run* current_runs_[kNumOfSizeBrackets];
+  // The mutexes, one per size bracket.
+  Mutex* size_bracket_locks_[kNumOfSizeBrackets];
+  // The types of page map entries.
+  enum {
+    kPageMapEmpty           = 0,  // Not allocated.
+    kPageMapRun             = 1,  // The beginning of a run.
+    kPageMapRunPart         = 2,  // The non-beginning part of a run.
+    kPageMapLargeObject     = 3,  // The beginning of a large object.
+    kPageMapLargeObjectPart = 4,  // The non-beginning part of a large object.
+  };
+  // The table that indicates what pages are currently used for.
+  std::vector<byte> page_map_ GUARDED_BY(lock_);
+  // The table that indicates the size of free page runs. These sizes
+  // are stored here to avoid storing in the free page header and
+  // release backing pages.
+  std::vector<size_t> free_page_run_size_map_ GUARDED_BY(lock_);
+  // The global lock. Used to guard the page map, the free page set,
+  // and the footprint.
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // The reader-writer lock to allow one bulk free at a time while
+  // allowing multiple individual frees at the same time.
+  ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // The base address of the memory region that's managed by this allocator.
+  byte* Begin() { return base_; }
+  // The end address of the memory region that's managed by this allocator.
+  byte* End() { return base_ + capacity_; }
+
+  // Page-granularity alloc/free
+  void* AllocPages(Thread* self, size_t num_pages, byte page_map_type)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Allocate/free a run slot.
+  void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void FreeFromRun(Thread* self, void* ptr, Run* run)
+      LOCKS_EXCLUDED(lock_);
+
+  // Used to acquire a new/reused run for a size bracket. Used when a
+  // thread-local or current run gets full.
+  Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
+
+  // The internal of non-bulk Free().
+  void FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
+
+  // Allocates large objects.
+  void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
+
+ public:
+  RosAlloc(void* base, size_t capacity);
+  void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void Free(Thread* self, void* ptr)
+      LOCKS_EXCLUDED(bulk_free_lock_);
+  void BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
+      LOCKS_EXCLUDED(bulk_free_lock_);
+  // Returns the size of the allocated slot for a given allocated memory chunk.
+  size_t UsableSize(void* ptr);
+  // Returns the size of the allocated slot for a given size.
+  size_t UsableSize(size_t bytes) {
+    if (UNLIKELY(bytes > kLargeSizeThreshold)) {
+      return RoundUp(bytes, kPageSize);
+    } else {
+      return RoundToBracketSize(bytes);
+    }
+  }
+  // Try to reduce the current footprint by releasing the free page
+  // run at the end of the memory region, if any.
+  bool Trim();
+  // Iterates over all the memory slots and apply the given function.
+  void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                  void* arg)
+      LOCKS_EXCLUDED(lock_);
+  // Returns the current footprint.
+  size_t Footprint() LOCKS_EXCLUDED(lock_);
+  // Returns the current capacity, maximum footprint.
+  size_t FootprintLimit() LOCKS_EXCLUDED(lock_);
+  // Update the current capacity.
+  void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_);
+  // Releases the thread-local runs assigned to the given thread back to the common set of runs.
+  void RevokeThreadLocalRuns(Thread* thread);
+  // Releases the thread-local runs assigned to all the threads back to the common set of runs.
+  void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+  // Dumps the page map for debugging.
+  void DumpPageMap(Thread* self);
+
+  // Callbacks for InspectAll that will count the number of bytes
+  // allocated and objects allocated, respectively.
+  static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+  static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+};
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 6691cad..cf301fe 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -21,6 +21,7 @@
 
 #include "garbage_collector.h"
 
+#include "base/histogram-inl.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
 #include "gc/accounting/heap_bitmap.h"
@@ -36,9 +37,11 @@
 GarbageCollector::GarbageCollector(Heap* heap, const std::string& name)
     : heap_(heap),
       name_(name),
+      clear_soft_references_(false),
       verbose_(VLOG_IS_ON(heap)),
       duration_ns_(0),
       timings_(name_.c_str(), true, verbose_),
+      pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount),
       cumulative_timings_(name) {
   ResetCumulativeStatistics();
 }
@@ -54,17 +57,24 @@
 
 void GarbageCollector::ResetCumulativeStatistics() {
   cumulative_timings_.Reset();
+  pause_histogram_.Reset();
   total_time_ns_ = 0;
-  total_paused_time_ns_ = 0;
   total_freed_objects_ = 0;
   total_freed_bytes_ = 0;
 }
 
-void GarbageCollector::Run() {
+void GarbageCollector::Run(bool clear_soft_references) {
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   uint64_t start_time = NanoTime();
   pause_times_.clear();
   duration_ns_ = 0;
+  clear_soft_references_ = clear_soft_references;
+
+  // Reset stats.
+  freed_bytes_ = 0;
+  freed_large_object_bytes_ = 0;
+  freed_objects_ = 0;
+  freed_large_objects_ = 0;
 
   InitializePhase();
 
@@ -75,10 +85,10 @@
     thread_list->SuspendAll();
     MarkingPhase();
     ReclaimPhase();
+    GetHeap()->RevokeAllThreadLocalBuffers();
     thread_list->ResumeAll();
     ATRACE_END();
-    uint64_t pause_end = NanoTime();
-    pause_times_.push_back(pause_end - pause_start);
+    RegisterPause(NanoTime() - pause_start);
   } else {
     Thread* self = Thread::Current();
     {
@@ -93,23 +103,28 @@
       ATRACE_END();
       ATRACE_BEGIN("All mutator threads suspended");
       done = HandleDirtyObjectsPhase();
+      if (done) {
+        GetHeap()->RevokeAllThreadLocalBuffers();
+      }
       ATRACE_END();
       uint64_t pause_end = NanoTime();
       ATRACE_BEGIN("Resuming mutator threads");
       thread_list->ResumeAll();
       ATRACE_END();
-      pause_times_.push_back(pause_end - pause_start);
+      RegisterPause(pause_end - pause_start);
     }
     {
       ReaderMutexLock mu(self, *Locks::mutator_lock_);
       ReclaimPhase();
     }
   }
-
+  FinishPhase();
   uint64_t end_time = NanoTime();
   duration_ns_ = end_time - start_time;
-
-  FinishPhase();
+  total_time_ns_ += GetDurationNs();
+  for (uint64_t pause_time : pause_times_) {
+    pause_histogram_.AddValue(pause_time / 1000);
+  }
 }
 
 void GarbageCollector::SwapBitmaps() {
@@ -127,7 +142,7 @@
       if (live_bitmap != mark_bitmap) {
         heap_->GetLiveBitmap()->ReplaceBitmap(live_bitmap, mark_bitmap);
         heap_->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-        space->AsDlMallocSpace()->SwapBitmaps();
+        space->AsMallocSpace()->SwapBitmaps();
       }
     }
   }
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 0f566c9..1779339 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -17,10 +17,10 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_GARBAGE_COLLECTOR_H_
 #define ART_RUNTIME_GC_COLLECTOR_GARBAGE_COLLECTOR_H_
 
+#include "base/histogram.h"
+#include "base/timing_logger.h"
 #include "gc_type.h"
 #include "locks.h"
-#include "base/timing_logger.h"
-
 #include <stdint.h>
 #include <vector>
 
@@ -46,7 +46,7 @@
   virtual GcType GetGcType() const = 0;
 
   // Run the garbage collector.
-  void Run();
+  void Run(bool clear_soft_references);
 
   Heap* GetHeap() const {
     return heap_;
@@ -64,7 +64,7 @@
 
   void RegisterPause(uint64_t nano_length);
 
-  base::TimingLogger& GetTimings() {
+  TimingLogger& GetTimings() {
     return timings_;
   }
 
@@ -78,6 +78,38 @@
   // this is the allocation space, for full GC then we swap the zygote bitmaps too.
   void SwapBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  size_t GetFreedBytes() const {
+    return freed_bytes_;
+  }
+
+  size_t GetFreedLargeObjectBytes() const {
+    return freed_large_object_bytes_;
+  }
+
+  size_t GetFreedObjects() const {
+    return freed_objects_;
+  }
+
+  size_t GetFreedLargeObjects() const {
+    return freed_large_objects_;
+  }
+
+  uint64_t GetTotalPausedTimeNs() const {
+    return pause_histogram_.Sum();
+  }
+
+  uint64_t GetTotalFreedBytes() const {
+    return total_freed_bytes_;
+  }
+
+  uint64_t GetTotalFreedObjects() const {
+    return total_freed_objects_;
+  }
+
+  const Histogram<uint64_t>& GetPauseHistogram() const {
+    return pause_histogram_;
+  }
+
  protected:
   // The initial phase. Done without mutators paused.
   virtual void InitializePhase() = 0;
@@ -94,21 +126,32 @@
   // Called after the GC is finished. Done without mutators paused.
   virtual void FinishPhase() = 0;
 
+  static constexpr size_t kPauseBucketSize = 500;
+  static constexpr size_t kPauseBucketCount = 32;
+
   Heap* const heap_;
 
   std::string name_;
 
+  bool clear_soft_references_;
+
   const bool verbose_;
 
   uint64_t duration_ns_;
-  base::TimingLogger timings_;
+  TimingLogger timings_;
 
   // Cumulative statistics.
+  Histogram<uint64_t> pause_histogram_;
   uint64_t total_time_ns_;
-  uint64_t total_paused_time_ns_;
   uint64_t total_freed_objects_;
   uint64_t total_freed_bytes_;
 
+  // Single GC statitstics.
+  AtomicInteger freed_bytes_;
+  AtomicInteger freed_large_object_bytes_;
+  AtomicInteger freed_objects_;
+  AtomicInteger freed_large_objects_;
+
   CumulativeLogger cumulative_timings_;
 
   std::vector<uint64_t> pause_times_;
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 270c9ef..7a51553 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -44,8 +44,7 @@
     if (klass->IsObjectArrayClass()) {
       VisitObjectArrayReferences(obj->AsObjectArray<mirror::Object>(), visitor);
     }
-  } else if (UNLIKELY(klass == java_lang_Class_)) {
-    DCHECK_EQ(klass->GetClass(), java_lang_Class_);
+  } else if (UNLIKELY(klass == mirror::Class::GetJavaLangClass())) {
     if (kCountScannedTypes) {
       ++class_count_;
     }
@@ -56,7 +55,7 @@
     }
     VisitOtherReferences(klass, obj, visitor);
     if (UNLIKELY(klass->IsReferenceClass())) {
-      DelayReferenceReferent(klass, const_cast<mirror::Object*>(obj));
+      DelayReferenceReferent(klass, obj);
     }
   }
 }
@@ -68,11 +67,10 @@
                           Locks::mutator_lock_) {
   DCHECK(obj != NULL);
   DCHECK(obj->GetClass() != NULL);
-
   mirror::Class* klass = obj->GetClass();
   DCHECK(klass != NULL);
   if (visit_class) {
-    visitor(obj, klass, MemberOffset(0), false);
+    visitor(obj, klass, mirror::Object::ClassOffset(), false);
   }
   if (klass == mirror::Class::GetJavaLangClass()) {
     DCHECK_EQ(klass->GetClass(), mirror::Class::GetJavaLangClass());
@@ -90,8 +88,7 @@
 }
 
 template <typename Visitor>
-inline void MarkSweep::VisitInstanceFieldsReferences(mirror::Class* klass,
-                                                     mirror::Object* obj,
+inline void MarkSweep::VisitInstanceFieldsReferences(mirror::Class* klass, mirror::Object* obj,
                                                      const Visitor& visitor)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
   DCHECK(obj != NULL);
@@ -119,11 +116,6 @@
                                              bool is_static, const Visitor& visitor) {
   if (LIKELY(ref_offsets != CLASS_WALK_SUPER)) {
     // Found a reference offset bitmap.  Mark the specified offsets.
-#ifndef MOVING_COLLECTOR
-    // Clear the class bit since we mark the class as part of marking the classlinker roots.
-    DCHECK_EQ(mirror::Object::ClassOffset().Uint32Value(), 0U);
-    ref_offsets &= (1U << (sizeof(ref_offsets) * 8 - 1)) - 1;
-#endif
     while (ref_offsets != 0) {
       size_t right_shift = CLZ(ref_offsets);
       MemberOffset field_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 2c69c77..0697a65 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -93,6 +93,8 @@
   }
 
   // Add the space to the immune region.
+  // TODO: Use space limits instead of current end_ since the end_ can be changed by dlmalloc
+  // callbacks.
   if (immune_begin_ == NULL) {
     DCHECK(immune_end_ == NULL);
     SetImmuneRange(reinterpret_cast<Object*>(space->Begin()),
@@ -108,14 +110,14 @@
     }
     // If previous space was immune, then extend the immune region. Relies on continuous spaces
     // being sorted by Heap::AddContinuousSpace.
-    if (prev_space != NULL && IsImmuneSpace(prev_space)) {
+    if (prev_space != nullptr && IsImmuneSpace(prev_space)) {
       immune_begin_ = std::min(reinterpret_cast<Object*>(space->Begin()), immune_begin_);
       immune_end_ = std::max(reinterpret_cast<Object*>(space->End()), immune_end_);
     }
   }
 }
 
-bool MarkSweep::IsImmuneSpace(const space::ContinuousSpace* space) {
+bool MarkSweep::IsImmuneSpace(const space::ContinuousSpace* space) const {
   return
       immune_begin_ <= reinterpret_cast<Object*>(space->Begin()) &&
       immune_end_ >= reinterpret_cast<Object*>(space->End());
@@ -135,10 +137,9 @@
 
 MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
     : GarbageCollector(heap,
-                       name_prefix + (name_prefix.empty() ? "" : " ") +
+                       name_prefix +
                        (is_concurrent ? "concurrent mark sweep": "mark sweep")),
       current_mark_bitmap_(NULL),
-      java_lang_Class_(NULL),
       mark_stack_(NULL),
       immune_begin_(NULL),
       immune_end_(NULL),
@@ -147,16 +148,16 @@
       finalizer_reference_list_(NULL),
       phantom_reference_list_(NULL),
       cleared_reference_list_(NULL),
+      live_stack_freeze_size_(0),
       gc_barrier_(new Barrier(0)),
       large_object_lock_("mark sweep large object lock", kMarkSweepLargeObjectLock),
       mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock),
-      is_concurrent_(is_concurrent),
-      clear_soft_references_(false) {
+      is_concurrent_(is_concurrent) {
 }
 
 void MarkSweep::InitializePhase() {
   timings_.Reset();
-  base::TimingLogger::ScopedSplit split("InitializePhase", &timings_);
+  TimingLogger::ScopedSplit split("InitializePhase", &timings_);
   mark_stack_ = heap_->mark_stack_.get();
   DCHECK(mark_stack_ != nullptr);
   SetImmuneRange(nullptr, nullptr);
@@ -165,10 +166,6 @@
   finalizer_reference_list_ = nullptr;
   phantom_reference_list_ = nullptr;
   cleared_reference_list_ = nullptr;
-  freed_bytes_ = 0;
-  freed_large_object_bytes_ = 0;
-  freed_objects_ = 0;
-  freed_large_objects_ = 0;
   class_count_ = 0;
   array_count_ = 0;
   other_count_ = 0;
@@ -179,8 +176,6 @@
   work_chunks_created_ = 0;
   work_chunks_deleted_ = 0;
   reference_count_ = 0;
-  java_lang_Class_ = Class::GetJavaLangClass();
-  CHECK(java_lang_Class_ != nullptr);
 
   FindDefaultMarkBitmap();
 
@@ -190,14 +185,14 @@
 }
 
 void MarkSweep::ProcessReferences(Thread* self) {
-  base::TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
+  TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  ProcessReferences(&soft_reference_list_, clear_soft_references_, &weak_reference_list_,
-                    &finalizer_reference_list_, &phantom_reference_list_);
+  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &IsMarkedCallback,
+                               &RecursiveMarkObjectCallback, this);
 }
 
 bool MarkSweep::HandleDirtyObjectsPhase() {
-  base::TimingLogger::ScopedSplit split("HandleDirtyObjectsPhase", &timings_);
+  TimingLogger::ScopedSplit split("HandleDirtyObjectsPhase", &timings_);
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
 
@@ -243,7 +238,7 @@
 }
 
 void MarkSweep::MarkingPhase() {
-  base::TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
+  TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
   Thread* self = Thread::Current();
 
   BindBitmaps();
@@ -277,7 +272,7 @@
     if (IsImmuneSpace(space)) {
       const char* name = space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
           "UpdateAndMarkImageModUnionTable";
-      base::TimingLogger::ScopedSplit split(name, &timings_);
+      TimingLogger::ScopedSplit split(name, &timings_);
       accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
       CHECK(mod_union_table != nullptr);
       mod_union_table->UpdateAndMarkReferences(MarkRootCallback, this);
@@ -294,8 +289,7 @@
   // knowing that new allocations won't be marked as live.
   timings_.StartSplit("MarkStackAsLive");
   accounting::ObjectStack* live_stack = heap_->GetLiveStack();
-  heap_->MarkAllocStack(heap_->alloc_space_->GetLiveBitmap(),
-                        heap_->large_object_space_->GetLiveObjects(), live_stack);
+  heap_->MarkAllocStackAsLive(live_stack);
   live_stack->Reset();
   timings_.EndSplit();
   // Recursively mark all the non-image bits set in the mark bitmap.
@@ -303,7 +297,7 @@
 }
 
 void MarkSweep::ReclaimPhase() {
-  base::TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
+  TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
   Thread* self = Thread::Current();
 
   if (!IsConcurrent()) {
@@ -318,7 +312,7 @@
   if (IsConcurrent()) {
     Runtime::Current()->AllowNewSystemWeaks();
 
-    base::TimingLogger::ScopedSplit split("UnMarkAllocStack", &timings_);
+    TimingLogger::ScopedSplit split("UnMarkAllocStack", &timings_);
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     accounting::ObjectStack* allocation_stack = GetHeap()->allocation_stack_.get();
     // The allocation stack contains things allocated since the start of the GC. These may have been
@@ -369,10 +363,12 @@
 }
 
 void MarkSweep::FindDefaultMarkBitmap() {
-  base::TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
+  TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
-      current_mark_bitmap_ = space->GetMarkBitmap();
+    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
+    if (bitmap != nullptr &&
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
+      current_mark_bitmap_ = bitmap;
       CHECK(current_mark_bitmap_ != NULL);
       return;
     }
@@ -411,6 +407,13 @@
   }
 }
 
+mirror::Object* MarkSweep::RecursiveMarkObjectCallback(mirror::Object* obj, void* arg) {
+  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
+  mark_sweep->MarkObject(obj);
+  mark_sweep->ProcessMarkStack(true);
+  return obj;
+}
+
 inline void MarkSweep::UnMarkObjectNonNull(const Object* obj) {
   DCHECK(!IsImmune(obj));
   // Try to take advantage of locality of references within a space, failing this find the space
@@ -610,13 +613,11 @@
 }
 
 void MarkSweep::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsDlMallocSpace());
-  space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+  CHECK(space->IsMallocSpace());
+  space::MallocSpace* alloc_space = space->AsMallocSpace();
   accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = alloc_space->mark_bitmap_.release();
+  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
   GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-  alloc_space->temp_bitmap_.reset(mark_bitmap);
-  alloc_space->mark_bitmap_.reset(live_bitmap);
 }
 
 class ScanObjectVisitor {
@@ -625,7 +626,7 @@
       : mark_sweep_(mark_sweep) {}
 
   // TODO: Fixme when anotatalysis works with visitors.
-  void operator()(const Object* obj) const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(Object* obj) const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS {
     if (kCheckLocks) {
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
@@ -814,6 +815,9 @@
     const size_t mark_stack_delta = std::min(CardScanTask::kMaxSize / 2,
                                              mark_stack_size / mark_stack_tasks + 1);
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+      if (space->GetMarkBitmap() == nullptr) {
+        continue;
+      }
       byte* card_begin = space->Begin();
       byte* card_end = space->End();
       // Align up the end address. For example, the image space's end
@@ -856,24 +860,26 @@
     timings_.EndSplit();
   } else {
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-      // Image spaces are handled properly since live == marked for them.
-      switch (space->GetGcRetentionPolicy()) {
-        case space::kGcRetentionPolicyNeverCollect:
-          timings_.StartSplit(paused ? "(Paused)ScanGrayImageSpaceObjects" :
-              "ScanGrayImageSpaceObjects");
-          break;
-        case space::kGcRetentionPolicyFullCollect:
-          timings_.StartSplit(paused ? "(Paused)ScanGrayZygoteSpaceObjects" :
-              "ScanGrayZygoteSpaceObjects");
-          break;
-        case space::kGcRetentionPolicyAlwaysCollect:
-          timings_.StartSplit(paused ? "(Paused)ScanGrayAllocSpaceObjects" :
-              "ScanGrayAllocSpaceObjects");
-          break;
-        }
-      ScanObjectVisitor visitor(this);
-      card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, minimum_age);
-      timings_.EndSplit();
+      if (space->GetMarkBitmap() != nullptr) {
+        // Image spaces are handled properly since live == marked for them.
+        switch (space->GetGcRetentionPolicy()) {
+          case space::kGcRetentionPolicyNeverCollect:
+            timings_.StartSplit(paused ? "(Paused)ScanGrayImageSpaceObjects" :
+                "ScanGrayImageSpaceObjects");
+            break;
+          case space::kGcRetentionPolicyFullCollect:
+            timings_.StartSplit(paused ? "(Paused)ScanGrayZygoteSpaceObjects" :
+                "ScanGrayZygoteSpaceObjects");
+            break;
+          case space::kGcRetentionPolicyAlwaysCollect:
+            timings_.StartSplit(paused ? "(Paused)ScanGrayAllocSpaceObjects" :
+                "ScanGrayAllocSpaceObjects");
+            break;
+          }
+        ScanObjectVisitor visitor(this);
+        card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, minimum_age);
+        timings_.EndSplit();
+      }
     }
   }
 }
@@ -933,7 +939,7 @@
 // Populates the mark stack based on the set of marked objects and
 // recursively marks until the mark stack is emptied.
 void MarkSweep::RecursiveMark() {
-  base::TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
+  TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
   // RecursiveMark will build the lists of known instances of the Reference classes.
   // See DelayReferenceReferent for details.
   CHECK(soft_reference_list_ == NULL);
@@ -954,9 +960,8 @@
       if ((space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) ||
           (!partial && space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
         current_mark_bitmap_ = space->GetMarkBitmap();
-        if (current_mark_bitmap_ == NULL) {
-          GetHeap()->DumpSpaces();
-          LOG(FATAL) << "invalid bitmap";
+        if (current_mark_bitmap_ == nullptr) {
+          continue;
         }
         if (parallel) {
           // We will use the mark stack the future.
@@ -994,7 +999,7 @@
   ProcessMarkStack(false);
 }
 
-mirror::Object* MarkSweep::SystemWeakIsMarkedCallback(Object* object, void* arg) {
+mirror::Object* MarkSweep::IsMarkedCallback(Object* object, void* arg) {
   if (reinterpret_cast<MarkSweep*>(arg)->IsMarked(object)) {
     return object;
   }
@@ -1015,7 +1020,7 @@
 void MarkSweep::SweepSystemWeaks() {
   Runtime* runtime = Runtime::Current();
   timings_.StartSplit("SweepSystemWeaks");
-  runtime->SweepSystemWeaks(SystemWeakIsMarkedCallback, this);
+  runtime->SweepSystemWeaks(IsMarkedCallback, this);
   timings_.EndSplit();
 }
 
@@ -1121,7 +1126,7 @@
 }
 
 void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) {
-  space::DlMallocSpace* space = heap_->GetAllocSpace();
+  space::MallocSpace* space = heap_->GetNonMovingSpace();
   timings_.StartSplit("SweepArray");
   // Newly allocated objects MUST be in the alloc space and those are the only objects which we are
   // going to free.
@@ -1200,15 +1205,18 @@
 
 void MarkSweep::Sweep(bool swap_bitmaps) {
   DCHECK(mark_stack_->IsEmpty());
-  base::TimingLogger::ScopedSplit("Sweep", &timings_);
+  TimingLogger::ScopedSplit("Sweep", &timings_);
 
   const bool partial = (GetGcType() == kGcTypePartial);
   SweepCallbackContext scc;
   scc.mark_sweep = this;
   scc.self = Thread::Current();
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (!space->IsMallocSpace()) {
+      continue;
+    }
     // We always sweep always collect spaces.
-    bool sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect);
+    bool sweep_space = space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect;
     if (!partial && !sweep_space) {
       // We sweep full collect spaces when the GC isn't a partial GC (ie its full).
       sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect);
@@ -1216,19 +1224,19 @@
     if (sweep_space) {
       uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
       uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-      scc.space = space->AsDlMallocSpace();
+      scc.space = space->AsMallocSpace();
       accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
       if (swap_bitmaps) {
         std::swap(live_bitmap, mark_bitmap);
       }
       if (!space->IsZygoteSpace()) {
-        base::TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
+        TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
         // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
                                            &SweepCallback, reinterpret_cast<void*>(&scc));
       } else {
-        base::TimingLogger::ScopedSplit split("SweepZygote", &timings_);
+        TimingLogger::ScopedSplit split("SweepZygote", &timings_);
         // Zygote sweep takes care of dirtying cards and clearing live bits, does not free actual
         // memory.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
@@ -1241,7 +1249,7 @@
 }
 
 void MarkSweep::SweepLargeObjects(bool swap_bitmaps) {
-  base::TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
+  TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
   // Sweep large objects
   space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
   accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
@@ -1266,7 +1274,7 @@
 
 void MarkSweep::CheckReference(const Object* obj, const Object* ref, MemberOffset offset, bool is_static) {
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace() && space->Contains(ref)) {
+    if (space->IsMallocSpace() && space->Contains(ref)) {
       DCHECK(IsMarked(obj));
 
       bool is_marked = IsMarked(ref);
@@ -1313,40 +1321,7 @@
   DCHECK(klass != nullptr);
   DCHECK(klass->IsReferenceClass());
   DCHECK(obj != NULL);
-  Object* referent = heap_->GetReferenceReferent(obj);
-  if (referent != NULL && !IsMarked(referent)) {
-    if (kCountJavaLangRefs) {
-      ++reference_count_;
-    }
-    Thread* self = Thread::Current();
-    // TODO: Remove these locks, and use atomic stacks for storing references?
-    // We need to check that the references haven't already been enqueued since we can end up
-    // scanning the same reference multiple times due to dirty cards.
-    if (klass->IsSoftReferenceClass()) {
-      MutexLock mu(self, *heap_->GetSoftRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &soft_reference_list_);
-      }
-    } else if (klass->IsWeakReferenceClass()) {
-      MutexLock mu(self, *heap_->GetWeakRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &weak_reference_list_);
-      }
-    } else if (klass->IsFinalizerReferenceClass()) {
-      MutexLock mu(self, *heap_->GetFinalizerRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &finalizer_reference_list_);
-      }
-    } else if (klass->IsPhantomReferenceClass()) {
-      MutexLock mu(self, *heap_->GetPhantomRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &phantom_reference_list_);
-      }
-    } else {
-      LOG(FATAL) << "Invalid reference type " << PrettyClass(klass)
-                 << " " << std::hex << klass->GetAccessFlags();
-    }
-  }
+  heap_->DelayReferenceReferent(klass, obj, IsMarkedCallback, this);
 }
 
 class MarkObjectVisitor {
@@ -1370,9 +1345,9 @@
 
 // Scans an object reference.  Determines the type of the reference
 // and dispatches to a specialized scanning routine.
-void MarkSweep::ScanObject(const Object* obj) {
+void MarkSweep::ScanObject(Object* obj) {
   MarkObjectVisitor visitor(this);
-  ScanObjectVisit(const_cast<Object*>(obj), visitor);
+  ScanObjectVisit(obj, visitor);
 }
 
 void MarkSweep::ProcessMarkStackParallel(size_t thread_count) {
@@ -1406,12 +1381,12 @@
   } else {
     // TODO: Tune this.
     static const size_t kFifoSize = 4;
-    BoundedFifoPowerOfTwo<const Object*, kFifoSize> prefetch_fifo;
+    BoundedFifoPowerOfTwo<Object*, kFifoSize> prefetch_fifo;
     for (;;) {
-      const Object* obj = NULL;
+      Object* obj = NULL;
       if (kUseMarkStackPrefetch) {
         while (!mark_stack_->IsEmpty() && prefetch_fifo.size() < kFifoSize) {
-          const Object* obj = mark_stack_->PopBack();
+          Object* obj = mark_stack_->PopBack();
           DCHECK(obj != NULL);
           __builtin_prefetch(obj);
           prefetch_fifo.push_back(obj);
@@ -1434,43 +1409,6 @@
   timings_.EndSplit();
 }
 
-// Walks the reference list marking any references subject to the
-// reference clearing policy.  References with a black referent are
-// removed from the list.  References with white referents biased
-// toward saving are blackened and also removed from the list.
-void MarkSweep::PreserveSomeSoftReferences(Object** list) {
-  DCHECK(list != NULL);
-  Object* clear = NULL;
-  size_t counter = 0;
-
-  DCHECK(mark_stack_->IsEmpty());
-
-  timings_.StartSplit("PreserveSomeSoftReferences");
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent == NULL) {
-      // Referent was cleared by the user during marking.
-      continue;
-    }
-    bool is_marked = IsMarked(referent);
-    if (!is_marked && ((++counter) & 1)) {
-      // Referent is white and biased toward saving, mark it.
-      MarkObject(referent);
-      is_marked = true;
-    }
-    if (!is_marked) {
-      // Referent is white, queue it for clearing.
-      heap_->EnqueuePendingReference(ref, &clear);
-    }
-  }
-  *list = clear;
-  timings_.EndSplit();
-
-  // Restart the mark with the newly black references added to the root set.
-  ProcessMarkStack(true);
-}
-
 inline bool MarkSweep::IsMarked(const Object* object) const
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
   if (IsImmune(object)) {
@@ -1483,103 +1421,11 @@
   return heap_->GetMarkBitmap()->Test(object);
 }
 
-// Unlink the reference list clearing references objects with white
-// referents.  Cleared references registered to a reference queue are
-// scheduled for appending by the heap worker thread.
-void MarkSweep::ClearWhiteReferences(Object** list) {
-  DCHECK(list != NULL);
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent != NULL && !IsMarked(referent)) {
-      // Referent is white, clear it.
-      heap_->ClearReferenceReferent(ref);
-      if (heap_->IsEnqueuable(ref)) {
-        heap_->EnqueueReference(ref, &cleared_reference_list_);
-      }
-    }
-  }
-  DCHECK(*list == NULL);
-}
-
-// Enqueues finalizer references with white referents.  White
-// referents are blackened, moved to the zombie field, and the
-// referent field is cleared.
-void MarkSweep::EnqueueFinalizerReferences(Object** list) {
-  DCHECK(list != NULL);
-  timings_.StartSplit("EnqueueFinalizerReferences");
-  MemberOffset zombie_offset = heap_->GetFinalizerReferenceZombieOffset();
-  bool has_enqueued = false;
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent != NULL && !IsMarked(referent)) {
-      MarkObject(referent);
-      // If the referent is non-null the reference must queuable.
-      DCHECK(heap_->IsEnqueuable(ref));
-      ref->SetFieldObject(zombie_offset, referent, false);
-      heap_->ClearReferenceReferent(ref);
-      heap_->EnqueueReference(ref, &cleared_reference_list_);
-      has_enqueued = true;
-    }
-  }
-  timings_.EndSplit();
-  if (has_enqueued) {
-    ProcessMarkStack(true);
-  }
-  DCHECK(*list == NULL);
-}
-
-// Process reference class instances and schedule finalizations.
-void MarkSweep::ProcessReferences(Object** soft_references, bool clear_soft,
-                                  Object** weak_references,
-                                  Object** finalizer_references,
-                                  Object** phantom_references) {
-  CHECK(soft_references != NULL);
-  CHECK(weak_references != NULL);
-  CHECK(finalizer_references != NULL);
-  CHECK(phantom_references != NULL);
-  CHECK(mark_stack_->IsEmpty());
-
-  // Unless we are in the zygote or required to clear soft references
-  // with white references, preserve some white referents.
-  if (!clear_soft && !Runtime::Current()->IsZygote()) {
-    PreserveSomeSoftReferences(soft_references);
-  }
-
-  timings_.StartSplit("ProcessReferences");
-  // Clear all remaining soft and weak references with white
-  // referents.
-  ClearWhiteReferences(soft_references);
-  ClearWhiteReferences(weak_references);
-  timings_.EndSplit();
-
-  // Preserve all white objects with finalize methods and schedule
-  // them for finalization.
-  EnqueueFinalizerReferences(finalizer_references);
-
-  timings_.StartSplit("ProcessReferences");
-  // Clear all f-reachable soft and weak references with white
-  // referents.
-  ClearWhiteReferences(soft_references);
-  ClearWhiteReferences(weak_references);
-
-  // Clear all phantom references with white referents.
-  ClearWhiteReferences(phantom_references);
-
-  // At this point all reference lists should be empty.
-  DCHECK(*soft_references == NULL);
-  DCHECK(*weak_references == NULL);
-  DCHECK(*finalizer_references == NULL);
-  DCHECK(*phantom_references == NULL);
-  timings_.EndSplit();
-}
-
 void MarkSweep::UnBindBitmaps() {
-  base::TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+    if (space->IsMallocSpace()) {
+      space::MallocSpace* alloc_space = space->AsMallocSpace();
       if (alloc_space->temp_bitmap_.get() != NULL) {
         // At this point, the temp_bitmap holds our old mark bitmap.
         accounting::SpaceBitmap* new_bitmap = alloc_space->temp_bitmap_.release();
@@ -1593,26 +1439,16 @@
 }
 
 void MarkSweep::FinishPhase() {
-  base::TimingLogger::ScopedSplit split("FinishPhase", &timings_);
+  TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   // Can't enqueue references if we hold the mutator lock.
-  Object* cleared_references = GetClearedReferences();
   Heap* heap = GetHeap();
-  timings_.NewSplit("EnqueueClearedReferences");
-  heap->EnqueueClearedReferences(&cleared_references);
-
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
 
-  timings_.NewSplit("GrowForUtilization");
-  heap->GrowForUtilization(GetGcType(), GetDurationNs());
-
   timings_.NewSplit("RequestHeapTrim");
   heap->RequestHeapTrim();
 
   // Update the cumulative statistics
-  total_time_ns_ += GetDurationNs();
-  total_paused_time_ns_ += std::accumulate(GetPauseTimes().begin(), GetPauseTimes().end(), 0,
-                                           std::plus<uint64_t>());
   total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
   total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
 
@@ -1651,8 +1487,10 @@
 
   // Clear all of the spaces' mark bitmaps.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() != space::kGcRetentionPolicyNeverCollect) {
-      space->GetMarkBitmap()->Clear();
+    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
+    if (bitmap != nullptr &&
+        space->GetGcRetentionPolicy() != space::kGcRetentionPolicyNeverCollect) {
+      bitmap->Clear();
     }
   }
   mark_stack_->Reset();
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 3bc014a..53d85b0 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -114,7 +114,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsImmuneSpace(const space::ContinuousSpace* space)
+  bool IsImmuneSpace(const space::ContinuousSpace* space) const;
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
@@ -140,6 +140,7 @@
   void ProcessReferences(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Update and mark references from immune spaces.
   virtual void UpdateAndMarkModUnion()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -158,7 +159,7 @@
   }
 
   // Blackens an object.
-  void ScanObject(const mirror::Object* obj)
+  void ScanObject(mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -167,38 +168,6 @@
   void ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor)
       NO_THREAD_SAFETY_ANALYSIS;
 
-  size_t GetFreedBytes() const {
-    return freed_bytes_;
-  }
-
-  size_t GetFreedLargeObjectBytes() const {
-    return freed_large_object_bytes_;
-  }
-
-  size_t GetFreedObjects() const {
-    return freed_objects_;
-  }
-
-  size_t GetFreedLargeObjects() const {
-    return freed_large_objects_;
-  }
-
-  uint64_t GetTotalTimeNs() const {
-    return total_time_ns_;
-  }
-
-  uint64_t GetTotalPausedTimeNs() const {
-    return total_paused_time_ns_;
-  }
-
-  uint64_t GetTotalFreedObjects() const {
-    return total_freed_objects_;
-  }
-
-  uint64_t GetTotalFreedBytes() const {
-    return total_freed_bytes_;
-  }
-
   // Everything inside the immune range is assumed to be marked.
   void SetImmuneRange(mirror::Object* begin, mirror::Object* end);
 
@@ -216,11 +185,14 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   template <typename Visitor>
-  static void VisitObjectReferences(mirror::Object* obj, const Visitor& visitor,
-                                    bool visit_class = false)
+  static void VisitObjectReferences(mirror::Object* obj, const Visitor& visitor, bool visit_class)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                             Locks::mutator_lock_);
 
+  static mirror::Object* RecursiveMarkObjectCallback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
@@ -244,10 +216,7 @@
   // Returns true if the object has its bit set in the mark bitmap.
   bool IsMarked(const mirror::Object* object) const;
 
-  static mirror::Object* SystemWeakIsMarkedCallback(mirror::Object* object, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  static mirror::Object* SystemWeakIsMarkedArrayCallback(mirror::Object* object, void* arg)
+  static mirror::Object* IsMarkedCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static void VerifyImageRootVisitor(mirror::Object* root, void* arg)
@@ -381,13 +350,6 @@
   void ClearWhiteReferences(mirror::Object** list)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  void ProcessReferences(mirror::Object** soft_references, bool clear_soft_references,
-                         mirror::Object** weak_references,
-                         mirror::Object** finalizer_references,
-                         mirror::Object** phantom_references)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Whether or not we count how many of each type of object were scanned.
   static const bool kCountScannedTypes = false;
 
@@ -395,9 +357,6 @@
   // object.
   accounting::SpaceBitmap* current_mark_bitmap_;
 
-  // Cache java.lang.Class for optimization.
-  mirror::Class* java_lang_Class_;
-
   accounting::ObjectStack* mark_stack_;
 
   // Immune range, every object inside the immune range is assumed to be marked.
@@ -412,14 +371,6 @@
 
   // Parallel finger.
   AtomicInteger atomic_finger_;
-  // Number of non large object bytes freed in this collection.
-  AtomicInteger freed_bytes_;
-  // Number of large object bytes freed.
-  AtomicInteger freed_large_object_bytes_;
-  // Number of objects freed in this collection.
-  AtomicInteger freed_objects_;
-  // Number of freed large objects.
-  AtomicInteger freed_large_objects_;
   // Number of classes scanned, if kCountScannedTypes.
   AtomicInteger class_count_;
   // Number of arrays scanned, if kCountScannedTypes.
@@ -443,8 +394,6 @@
 
   const bool is_concurrent_;
 
-  bool clear_soft_references_;
-
  private:
   friend class AddIfReachesAllocSpaceVisitor;  // Used by mod-union table.
   friend class CardScanTask;
diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc
index 29367ce..8ec28f3 100644
--- a/runtime/gc/collector/partial_mark_sweep.cc
+++ b/runtime/gc/collector/partial_mark_sweep.cc
@@ -26,7 +26,7 @@
 namespace collector {
 
 PartialMarkSweep::PartialMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
-    : MarkSweep(heap, is_concurrent, name_prefix + (name_prefix.empty() ? "" : " ") + "partial") {
+    : MarkSweep(heap, is_concurrent, name_prefix.empty() ? "partial " : name_prefix) {
   cumulative_timings_.SetName(GetName());
 }
 
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
new file mode 100644
index 0000000..3b8f7c3
--- /dev/null
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_INL_H_
+#define ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_INL_H_
+
+namespace art {
+namespace gc {
+namespace collector {
+
+inline mirror::Object* SemiSpace::GetForwardingAddressInFromSpace(mirror::Object* obj) const {
+  DCHECK(from_space_->HasAddress(obj));
+  LockWord lock_word = obj->GetLockWord();
+  if (lock_word.GetState() != LockWord::kForwardingAddress) {
+    return nullptr;
+  }
+  return reinterpret_cast<mirror::Object*>(lock_word.ForwardingAddress());
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_INL_H_
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
new file mode 100644
index 0000000..3939354
--- /dev/null
+++ b/runtime/gc/collector/semi_space.cc
@@ -0,0 +1,615 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "semi_space.h"
+
+#include <functional>
+#include <numeric>
+#include <climits>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex-inl.h"
+#include "base/timing_logger.h"
+#include "gc/accounting/heap_bitmap.h"
+#include "gc/accounting/mod_union_table.h"
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "gc/space/bump_pointer_space.h"
+#include "gc/space/bump_pointer_space-inl.h"
+#include "gc/space/image_space.h"
+#include "gc/space/large_object_space.h"
+#include "gc/space/space-inl.h"
+#include "indirect_reference_table.h"
+#include "intern_table.h"
+#include "jni_internal.h"
+#include "mark_sweep-inl.h"
+#include "monitor.h"
+#include "mirror/art_field.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array.h"
+#include "mirror/object_array-inl.h"
+#include "runtime.h"
+#include "semi_space-inl.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+#include "verifier/method_verifier.h"
+
+using ::art::mirror::Class;
+using ::art::mirror::Object;
+
+namespace art {
+namespace gc {
+namespace collector {
+
+static constexpr bool kProtectFromSpace = true;
+static constexpr bool kResetFromSpace = true;
+
+// TODO: Unduplicate logic.
+void SemiSpace::ImmuneSpace(space::ContinuousSpace* space) {
+  // Bind live to mark bitmap if necessary.
+  if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
+    BindLiveToMarkBitmap(space);
+  }
+  // Add the space to the immune region.
+  if (immune_begin_ == nullptr) {
+    DCHECK(immune_end_ == nullptr);
+    immune_begin_ = reinterpret_cast<Object*>(space->Begin());
+    immune_end_ = reinterpret_cast<Object*>(space->End());
+  } else {
+    const space::ContinuousSpace* prev_space = nullptr;
+    // Find out if the previous space is immune.
+    for (space::ContinuousSpace* cur_space : GetHeap()->GetContinuousSpaces()) {
+      if (cur_space == space) {
+        break;
+      }
+      prev_space = cur_space;
+    }
+    // If previous space was immune, then extend the immune region. Relies on continuous spaces
+    // being sorted by Heap::AddContinuousSpace.
+    if (prev_space != nullptr && IsImmuneSpace(prev_space)) {
+      immune_begin_ = std::min(reinterpret_cast<Object*>(space->Begin()), immune_begin_);
+      immune_end_ = std::max(reinterpret_cast<Object*>(space->End()), immune_end_);
+    }
+  }
+}
+
+void SemiSpace::BindBitmaps() {
+  timings_.StartSplit("BindBitmaps");
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  // Mark all of the spaces we never collect as immune.
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
+        || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+      ImmuneSpace(space);
+    }
+  }
+  timings_.EndSplit();
+}
+
+SemiSpace::SemiSpace(Heap* heap, const std::string& name_prefix)
+    : GarbageCollector(heap,
+                       name_prefix + (name_prefix.empty() ? "" : " ") + "marksweep + semispace"),
+      mark_stack_(nullptr),
+      immune_begin_(nullptr),
+      immune_end_(nullptr),
+      to_space_(nullptr),
+      from_space_(nullptr),
+      soft_reference_list_(nullptr),
+      weak_reference_list_(nullptr),
+      finalizer_reference_list_(nullptr),
+      phantom_reference_list_(nullptr),
+      cleared_reference_list_(nullptr),
+      self_(nullptr) {
+}
+
+void SemiSpace::InitializePhase() {
+  timings_.Reset();
+  TimingLogger::ScopedSplit split("InitializePhase", &timings_);
+  mark_stack_ = heap_->mark_stack_.get();
+  DCHECK(mark_stack_ != nullptr);
+  immune_begin_ = nullptr;
+  immune_end_ = nullptr;
+  soft_reference_list_ = nullptr;
+  weak_reference_list_ = nullptr;
+  finalizer_reference_list_ = nullptr;
+  phantom_reference_list_ = nullptr;
+  cleared_reference_list_ = nullptr;
+  self_ = Thread::Current();
+  // Do any pre GC verification.
+  timings_.NewSplit("PreGcVerification");
+  heap_->PreGcVerification(this);
+}
+
+void SemiSpace::ProcessReferences(Thread* self) {
+  TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
+                               &RecursiveMarkObjectCallback, this);
+}
+
+void SemiSpace::MarkingPhase() {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
+  // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
+  // wrong space.
+  heap_->SwapSemiSpaces();
+  // Assume the cleared space is already empty.
+  BindBitmaps();
+  // Process dirty cards and add dirty cards to mod-union tables.
+  heap_->ProcessCards(timings_);
+  // Need to do this before the checkpoint since we don't want any threads to add references to
+  // the live stack during the recursive mark.
+  timings_.NewSplit("SwapStacks");
+  heap_->SwapStacks();
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  MarkRoots();
+  // Mark roots of immune spaces.
+  UpdateAndMarkModUnion();
+  // Recursively mark remaining objects.
+  MarkReachableObjects();
+}
+
+bool SemiSpace::IsImmuneSpace(const space::ContinuousSpace* space) const {
+  return
+    immune_begin_ <= reinterpret_cast<Object*>(space->Begin()) &&
+    immune_end_ >= reinterpret_cast<Object*>(space->End());
+}
+
+void SemiSpace::UpdateAndMarkModUnion() {
+  for (auto& space : heap_->GetContinuousSpaces()) {
+    // If the space is immune then we need to mark the references to other spaces.
+    if (IsImmuneSpace(space)) {
+      accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+      CHECK(table != nullptr);
+      // TODO: Improve naming.
+      TimingLogger::ScopedSplit split(
+          space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
+                                   "UpdateAndMarkImageModUnionTable",
+                                   &timings_);
+      table->UpdateAndMarkReferences(MarkRootCallback, this);
+    }
+  }
+}
+
+void SemiSpace::MarkReachableObjects() {
+  timings_.StartSplit("MarkStackAsLive");
+  accounting::ObjectStack* live_stack = heap_->GetLiveStack();
+  heap_->MarkAllocStackAsLive(live_stack);
+  live_stack->Reset();
+  timings_.EndSplit();
+  // Recursively process the mark stack.
+  ProcessMarkStack(true);
+}
+
+void SemiSpace::ReclaimPhase() {
+  TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
+  Thread* self = Thread::Current();
+  ProcessReferences(self);
+  {
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    SweepSystemWeaks();
+  }
+  // Record freed memory.
+  int from_bytes = from_space_->GetBytesAllocated();
+  int to_bytes = to_space_->GetBytesAllocated();
+  int from_objects = from_space_->GetObjectsAllocated();
+  int to_objects = to_space_->GetObjectsAllocated();
+  int freed_bytes = from_bytes - to_bytes;
+  int freed_objects = from_objects - to_objects;
+  CHECK_GE(freed_bytes, 0);
+  freed_bytes_.fetch_add(freed_bytes);
+  freed_objects_.fetch_add(freed_objects);
+  heap_->RecordFree(static_cast<size_t>(freed_objects), static_cast<size_t>(freed_bytes));
+
+  timings_.StartSplit("PreSweepingGcVerification");
+  heap_->PreSweepingGcVerification(this);
+  timings_.EndSplit();
+
+  {
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    // Reclaim unmarked objects.
+    Sweep(false);
+    // Swap the live and mark bitmaps for each space which we modified space. This is an
+    // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
+    // bitmaps.
+    timings_.StartSplit("SwapBitmaps");
+    SwapBitmaps();
+    timings_.EndSplit();
+    // Unbind the live and mark bitmaps.
+    UnBindBitmaps();
+  }
+  // Release the memory used by the from space.
+  if (kResetFromSpace) {
+    // Clearing from space.
+    from_space_->Clear();
+  }
+  // Protect the from space.
+  VLOG(heap)
+      << "mprotect region " << reinterpret_cast<void*>(from_space_->Begin()) << " - "
+      << reinterpret_cast<void*>(from_space_->Limit());
+  if (kProtectFromSpace) {
+    mprotect(from_space_->Begin(), from_space_->Capacity(), PROT_NONE);
+  } else {
+    mprotect(from_space_->Begin(), from_space_->Capacity(), PROT_READ);
+  }
+}
+
+void SemiSpace::ResizeMarkStack(size_t new_size) {
+  std::vector<Object*> temp(mark_stack_->Begin(), mark_stack_->End());
+  CHECK_LE(mark_stack_->Size(), new_size);
+  mark_stack_->Resize(new_size);
+  for (const auto& obj : temp) {
+    mark_stack_->PushBack(obj);
+  }
+}
+
+inline void SemiSpace::MarkStackPush(Object* obj) {
+  if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
+    ResizeMarkStack(mark_stack_->Capacity() * 2);
+  }
+  // The object must be pushed on to the mark stack.
+  mark_stack_->PushBack(obj);
+}
+
+// Rare case, probably not worth inlining since it will increase instruction cache miss rate.
+bool SemiSpace::MarkLargeObject(const Object* obj) {
+  // TODO: support >1 discontinuous space.
+  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
+  accounting::SpaceSetMap* large_objects = large_object_space->GetMarkObjects();
+  if (UNLIKELY(!large_objects->Test(obj))) {
+    large_objects->Set(obj);
+    return true;
+  }
+  return false;
+}
+
+// Used to mark and copy objects. Any newly-marked objects who are in the from space get moved to
+// the to-space and have their forward address updated. Objects which have been newly marked are
+// pushed on the mark stack.
+Object* SemiSpace::MarkObject(Object* obj) {
+  Object* ret = obj;
+  if (obj != nullptr && !IsImmune(obj)) {
+    if (from_space_->HasAddress(obj)) {
+      mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
+      // If the object has already been moved, return the new forward address.
+      if (!to_space_->HasAddress(forward_address)) {
+        // Otherwise, we need to move the object and add it to the markstack for processing.
+        size_t object_size = obj->SizeOf();
+        size_t dummy = 0;
+        forward_address = to_space_->Alloc(self_, object_size, &dummy);
+        // Copy over the object and add it to the mark stack since we still need to update it's
+        // references.
+        memcpy(reinterpret_cast<void*>(forward_address), obj, object_size);
+        // Make sure to only update the forwarding address AFTER you copy the object so that the
+        // monitor word doesn't get stomped over.
+        COMPILE_ASSERT(sizeof(uint32_t) == sizeof(mirror::Object*),
+                       monitor_size_must_be_same_as_object);
+        obj->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(forward_address)));
+        MarkStackPush(forward_address);
+      }
+      ret = forward_address;
+      // TODO: Do we need this if in the else statement?
+    } else {
+      accounting::SpaceBitmap* object_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
+      if (LIKELY(object_bitmap != nullptr)) {
+        // This object was not previously marked.
+        if (!object_bitmap->Test(obj)) {
+          object_bitmap->Set(obj);
+          MarkStackPush(obj);
+        }
+      } else {
+        DCHECK(!to_space_->HasAddress(obj)) << "Marking object in to_space_";
+        if (MarkLargeObject(obj)) {
+          MarkStackPush(obj);
+        }
+      }
+    }
+  }
+  return ret;
+}
+
+Object* SemiSpace::RecursiveMarkObjectCallback(Object* root, void* arg) {
+  DCHECK(root != nullptr);
+  DCHECK(arg != nullptr);
+  SemiSpace* semi_space = reinterpret_cast<SemiSpace*>(arg);
+  mirror::Object* ret = semi_space->MarkObject(root);
+  semi_space->ProcessMarkStack(true);
+  return ret;
+}
+
+Object* SemiSpace::MarkRootCallback(Object* root, void* arg) {
+  DCHECK(root != nullptr);
+  DCHECK(arg != nullptr);
+  return reinterpret_cast<SemiSpace*>(arg)->MarkObject(root);
+}
+
+// Marks all objects in the root set.
+void SemiSpace::MarkRoots() {
+  timings_.StartSplit("MarkRoots");
+  // TODO: Visit up image roots as well?
+  Runtime::Current()->VisitRoots(MarkRootCallback, this, false, true);
+  timings_.EndSplit();
+}
+
+void SemiSpace::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
+  CHECK(space->IsMallocSpace());
+  space::MallocSpace* alloc_space = space->AsMallocSpace();
+  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
+  GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
+}
+
+mirror::Object* SemiSpace::GetForwardingAddress(mirror::Object* obj) {
+  if (from_space_->HasAddress(obj)) {
+    LOG(FATAL) << "Shouldn't happen!";
+    return GetForwardingAddressInFromSpace(obj);
+  }
+  return obj;
+}
+
+mirror::Object* SemiSpace::MarkedForwardingAddressCallback(Object* object, void* arg) {
+  return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
+}
+
+void SemiSpace::SweepSystemWeaks() {
+  timings_.StartSplit("SweepSystemWeaks");
+  Runtime::Current()->SweepSystemWeaks(MarkedForwardingAddressCallback, this);
+  timings_.EndSplit();
+}
+
+struct SweepCallbackContext {
+  SemiSpace* mark_sweep;
+  space::AllocSpace* space;
+  Thread* self;
+};
+
+void SemiSpace::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
+  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
+  SemiSpace* gc = context->mark_sweep;
+  Heap* heap = gc->GetHeap();
+  space::AllocSpace* space = context->space;
+  Thread* self = context->self;
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(self);
+  size_t freed_bytes = space->FreeList(self, num_ptrs, ptrs);
+  heap->RecordFree(num_ptrs, freed_bytes);
+  gc->freed_objects_.fetch_add(num_ptrs);
+  gc->freed_bytes_.fetch_add(freed_bytes);
+}
+
+void SemiSpace::ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
+  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
+  Heap* heap = context->mark_sweep->GetHeap();
+  // We don't free any actual memory to avoid dirtying the shared zygote pages.
+  for (size_t i = 0; i < num_ptrs; ++i) {
+    Object* obj = static_cast<Object*>(ptrs[i]);
+    heap->GetLiveBitmap()->Clear(obj);
+    heap->GetCardTable()->MarkCard(obj);
+  }
+}
+
+void SemiSpace::Sweep(bool swap_bitmaps) {
+  DCHECK(mark_stack_->IsEmpty());
+  TimingLogger::ScopedSplit("Sweep", &timings_);
+
+  const bool partial = (GetGcType() == kGcTypePartial);
+  SweepCallbackContext scc;
+  scc.mark_sweep = this;
+  scc.self = Thread::Current();
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (!space->IsMallocSpace()) {
+      continue;
+    }
+    // We always sweep always collect spaces.
+    bool sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect);
+    if (!partial && !sweep_space) {
+      // We sweep full collect spaces when the GC isn't a partial GC (ie its full).
+      sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect);
+    }
+    if (sweep_space && space->IsMallocSpace()) {
+      uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
+      uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
+      scc.space = space->AsMallocSpace();
+      accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+      accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+      if (swap_bitmaps) {
+        std::swap(live_bitmap, mark_bitmap);
+      }
+      if (!space->IsZygoteSpace()) {
+        TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
+        // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
+        accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
+                                           &SweepCallback, reinterpret_cast<void*>(&scc));
+      } else {
+        TimingLogger::ScopedSplit split("SweepZygote", &timings_);
+        // Zygote sweep takes care of dirtying cards and clearing live bits, does not free actual
+        // memory.
+        accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
+                                           &ZygoteSweepCallback, reinterpret_cast<void*>(&scc));
+      }
+    }
+  }
+
+  SweepLargeObjects(swap_bitmaps);
+}
+
+void SemiSpace::SweepLargeObjects(bool swap_bitmaps) {
+  TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
+  // Sweep large objects
+  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
+  accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
+  accounting::SpaceSetMap* large_mark_objects = large_object_space->GetMarkObjects();
+  if (swap_bitmaps) {
+    std::swap(large_live_objects, large_mark_objects);
+  }
+  // O(n*log(n)) but hopefully there are not too many large objects.
+  size_t freed_objects = 0;
+  size_t freed_bytes = 0;
+  Thread* self = Thread::Current();
+  for (const Object* obj : large_live_objects->GetObjects()) {
+    if (!large_mark_objects->Test(obj)) {
+      freed_bytes += large_object_space->Free(self, const_cast<Object*>(obj));
+      ++freed_objects;
+    }
+  }
+  freed_large_objects_.fetch_add(freed_objects);
+  freed_large_object_bytes_.fetch_add(freed_bytes);
+  GetHeap()->RecordFree(freed_objects, freed_bytes);
+}
+
+// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// marked, put it on the appropriate list in the heap for later processing.
+void SemiSpace::DelayReferenceReferent(mirror::Class* klass, Object* obj) {
+  heap_->DelayReferenceReferent(klass, obj, MarkedForwardingAddressCallback, this);
+}
+
+// Visit all of the references of an object and update.
+void SemiSpace::ScanObject(Object* obj) {
+  DCHECK(obj != NULL);
+  DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
+  MarkSweep::VisitObjectReferences(obj, [this](Object* obj, Object* ref, const MemberOffset& offset,
+     bool /* is_static */) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS {
+    mirror::Object* new_address = MarkObject(ref);
+    if (new_address != ref) {
+      DCHECK(new_address != nullptr);
+      obj->SetFieldObject(offset, new_address, false);
+    }
+  }, kMovingClasses);
+  mirror::Class* klass = obj->GetClass();
+  if (UNLIKELY(klass->IsReferenceClass())) {
+    DelayReferenceReferent(klass, obj);
+  }
+}
+
+// Scan anything that's on the mark stack.
+void SemiSpace::ProcessMarkStack(bool paused) {
+  timings_.StartSplit(paused ? "(paused)ProcessMarkStack" : "ProcessMarkStack");
+  while (!mark_stack_->IsEmpty()) {
+    ScanObject(mark_stack_->PopBack());
+  }
+  timings_.EndSplit();
+}
+
+inline Object* SemiSpace::GetMarkedForwardAddress(mirror::Object* obj) const
+    SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+  // All immune objects are assumed marked.
+  if (IsImmune(obj)) {
+    return obj;
+  }
+  if (from_space_->HasAddress(obj)) {
+    mirror::Object* forwarding_address = GetForwardingAddressInFromSpace(const_cast<Object*>(obj));
+    // If the object is forwarded then it MUST be marked.
+    if (to_space_->HasAddress(forwarding_address)) {
+      return forwarding_address;
+    }
+    // Must not be marked, return nullptr;
+    return nullptr;
+  } else if (to_space_->HasAddress(obj)) {
+    // Already forwarded, must be marked.
+    return obj;
+  }
+  return heap_->GetMarkBitmap()->Test(obj) ? obj : nullptr;
+}
+
+void SemiSpace::UnBindBitmaps() {
+  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (space->IsMallocSpace()) {
+      space::MallocSpace* alloc_space = space->AsMallocSpace();
+      if (alloc_space->HasBoundBitmaps()) {
+        alloc_space->UnBindBitmaps();
+        heap_->GetMarkBitmap()->ReplaceBitmap(alloc_space->GetLiveBitmap(),
+                                              alloc_space->GetMarkBitmap());
+      }
+    }
+  }
+}
+
+void SemiSpace::SetToSpace(space::ContinuousMemMapAllocSpace* to_space) {
+  DCHECK(to_space != nullptr);
+  to_space_ = to_space;
+}
+
+void SemiSpace::SetFromSpace(space::ContinuousMemMapAllocSpace* from_space) {
+  DCHECK(from_space != nullptr);
+  from_space_ = from_space;
+}
+
+void SemiSpace::FinishPhase() {
+  TimingLogger::ScopedSplit split("FinishPhase", &timings_);
+  // Can't enqueue references if we hold the mutator lock.
+  Heap* heap = GetHeap();
+  timings_.NewSplit("PostGcVerification");
+  heap->PostGcVerification(this);
+
+  // Null the "to" and "from" spaces since compacting from one to the other isn't valid until
+  // further action is done by the heap.
+  to_space_ = nullptr;
+  from_space_ = nullptr;
+
+  // Update the cumulative statistics
+  total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
+  total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
+
+  // Ensure that the mark stack is empty.
+  CHECK(mark_stack_->IsEmpty());
+
+  // Update the cumulative loggers.
+  cumulative_timings_.Start();
+  cumulative_timings_.AddLogger(timings_);
+  cumulative_timings_.End();
+
+  // Clear all of the spaces' mark bitmaps.
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
+    if (bitmap != nullptr &&
+        space->GetGcRetentionPolicy() != space::kGcRetentionPolicyNeverCollect) {
+      bitmap->Clear();
+    }
+  }
+  mark_stack_->Reset();
+
+  // Reset the marked large objects.
+  space::LargeObjectSpace* large_objects = GetHeap()->GetLargeObjectsSpace();
+  large_objects->GetMarkObjects()->Clear();
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
new file mode 100644
index 0000000..0f0cae1
--- /dev/null
+++ b/runtime/gc/collector/semi_space.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
+#define ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
+
+#include "atomic_integer.h"
+#include "barrier.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "garbage_collector.h"
+#include "offsets.h"
+#include "root_visitor.h"
+#include "UniquePtr.h"
+
+namespace art {
+
+namespace mirror {
+  class Class;
+  class Object;
+  template<class T> class ObjectArray;
+}  // namespace mirror
+
+class StackVisitor;
+class Thread;
+
+namespace gc {
+
+namespace accounting {
+  template <typename T> class AtomicStack;
+  class MarkIfReachesAllocspaceVisitor;
+  class ModUnionClearCardVisitor;
+  class ModUnionVisitor;
+  class ModUnionTableBitmap;
+  class MarkStackChunk;
+  typedef AtomicStack<mirror::Object*> ObjectStack;
+  class SpaceBitmap;
+}  // namespace accounting
+
+namespace space {
+  class BumpPointerSpace;
+  class ContinuousMemMapAllocSpace;
+  class ContinuousSpace;
+}  // namespace space
+
+class Heap;
+
+namespace collector {
+
+class SemiSpace : public GarbageCollector {
+ public:
+  explicit SemiSpace(Heap* heap, const std::string& name_prefix = "");
+
+  ~SemiSpace() {}
+
+  virtual void InitializePhase();
+  virtual bool IsConcurrent() const {
+    return false;
+  }
+  virtual void MarkingPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void ReclaimPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void FinishPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void MarkReachableObjects()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  virtual GcType GetGcType() const {
+    return kGcTypePartial;
+  }
+
+  // Sets which space we will be copying objects to.
+  void SetToSpace(space::ContinuousMemMapAllocSpace* to_space);
+
+  // Set the space where we copy objects from.
+  void SetFromSpace(space::ContinuousMemMapAllocSpace* from_space);
+
+  // Initializes internal structures.
+  void Init();
+
+  // Find the default mark bitmap.
+  void FindDefaultMarkBitmap();
+
+  // Returns the new address of the object.
+  mirror::Object* MarkObject(mirror::Object* object)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  void ScanObject(mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Marks the root set at the start of a garbage collection.
+  void MarkRoots()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Make a space immune, immune spaces have all live objects marked - that is the mark and
+  // live bitmaps are bound together.
+  void ImmuneSpace(space::ContinuousSpace* space)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
+  // the image. Mark that portion of the heap as immune.
+  virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void BindLiveToMarkBitmap(space::ContinuousSpace* space)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  void UnBindBitmaps()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  void ProcessReferences(Thread* self)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Sweeps unmarked objects to complete the garbage collection.
+  virtual void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Sweeps unmarked objects to complete the garbage collection.
+  void SweepLargeObjects(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Sweep only pointers within an array. WARNING: Trashes objects.
+  void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  mirror::Object* GetClearedReferences() {
+    return cleared_reference_list_;
+  }
+
+  // TODO: enable thread safety analysis when in use by multiple worker threads.
+  template <typename MarkVisitor>
+  void ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  void SweepSystemWeaks()
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  template <typename Visitor>
+  static void VisitObjectReferencesAndClass(mirror::Object* obj, const Visitor& visitor)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static mirror::Object* RecursiveMarkObjectCallback(mirror::Object* root, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+ protected:
+  // Returns null if the object is not marked, otherwise returns the forwarding address (same as
+  // object for non movable things).
+  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const;
+
+  static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
+  // mark, otherwise we unmark.
+  bool MarkLargeObject(const mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Special sweep for zygote that just marks objects / dirties cards.
+  static void ZygoteSweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Expand mark stack to 2x its current size.
+  void ResizeMarkStack(size_t new_size);
+
+  // Returns how many threads we should use for the current GC phase based on if we are paused,
+  // whether or not we care about pauses.
+  size_t GetThreadCount(bool paused) const;
+
+  // Returns true if an object is inside of the immune region (assumed to be marked).
+  bool IsImmune(const mirror::Object* obj) const ALWAYS_INLINE {
+    return obj >= immune_begin_ && obj < immune_end_;
+  }
+
+  bool IsImmuneSpace(const space::ContinuousSpace* space) const;
+
+  static void VerifyRootCallback(const mirror::Object* root, void* arg, size_t vreg,
+                                 const StackVisitor *visitor);
+
+  void VerifyRoot(const mirror::Object* root, size_t vreg, const StackVisitor* visitor)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  template <typename Visitor>
+  static void VisitInstanceFieldsReferences(const mirror::Class* klass, const mirror::Object* obj,
+                                            const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Visit the header, static field references, and interface pointers of a class object.
+  template <typename Visitor>
+  static void VisitClassReferences(const mirror::Class* klass, const mirror::Object* obj,
+                                   const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  template <typename Visitor>
+  static void VisitStaticFieldsReferences(const mirror::Class* klass, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  template <typename Visitor>
+  static void VisitFieldsReferences(const mirror::Object* obj, uint32_t ref_offsets, bool is_static,
+                                    const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Visit all of the references in an object array.
+  template <typename Visitor>
+  static void VisitObjectArrayReferences(const mirror::ObjectArray<mirror::Object>* array,
+                                         const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Visits the header and field references of a data object.
+  template <typename Visitor>
+  static void VisitOtherReferences(const mirror::Class* klass, const mirror::Object* obj,
+                                   const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
+    return VisitInstanceFieldsReferences(klass, obj, visitor);
+  }
+
+  // Push an object onto the mark stack.
+  inline void MarkStackPush(mirror::Object* obj);
+
+  void UpdateAndMarkModUnion()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Schedules an unmarked object for reference processing.
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* reference)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Recursively blackens objects on the mark stack.
+  void ProcessMarkStack(bool paused)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  void EnqueueFinalizerReferences(mirror::Object** ref)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  void PreserveSomeSoftReferences(mirror::Object** ref)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  void ClearWhiteReferences(mirror::Object** list)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  void ProcessReferences(mirror::Object** soft_references, bool clear_soft_references,
+                         mirror::Object** weak_references,
+                         mirror::Object** finalizer_references,
+                         mirror::Object** phantom_references)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  inline mirror::Object* GetForwardingAddressInFromSpace(mirror::Object* obj) const;
+
+  mirror::Object* GetForwardingAddress(mirror::Object* obj);
+
+  // Current space, we check this space first to avoid searching for the appropriate space for an
+  // object.
+  accounting::ObjectStack* mark_stack_;
+
+  // Immune range, every object inside the immune range is assumed to be marked.
+  mirror::Object* immune_begin_;
+  mirror::Object* immune_end_;
+
+  // Destination and source spaces.
+  space::ContinuousMemMapAllocSpace* to_space_;
+  space::ContinuousMemMapAllocSpace* from_space_;
+
+  mirror::Object* soft_reference_list_;
+  mirror::Object* weak_reference_list_;
+  mirror::Object* finalizer_reference_list_;
+  mirror::Object* phantom_reference_list_;
+  mirror::Object* cleared_reference_list_;
+
+  Thread* self_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SemiSpace);
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index 9f0bf33..ee6077a 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -26,7 +26,7 @@
 
 StickyMarkSweep::StickyMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
     : PartialMarkSweep(heap, is_concurrent,
-                       name_prefix + (name_prefix.empty() ? "" : " ") + "sticky") {
+                       name_prefix.empty() ? "sticky " : name_prefix) {
   cumulative_timings_.SetName(GetName());
 }
 
@@ -38,7 +38,8 @@
   // know what was allocated since the last GC. A side-effect of binding the allocation space mark
   // and live bitmap is that marking the objects will place them in the live bitmap.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
+    if (space->IsMallocSpace() &&
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
       BindLiveToMarkBitmap(space);
     }
   }
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index 8bee00f..b675877 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -31,10 +31,6 @@
     return kGcTypeSticky;
   }
 
-  // Don't need to do anything special here since we scan all the cards which may have references
-  // to the newly allocated objects.
-  virtual void UpdateAndMarkModUnion() { }
-
   explicit StickyMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix = "");
   ~StickyMarkSweep() {}
 
@@ -53,6 +49,10 @@
 
   void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Don't need to do anything special here since we scan all the cards which may have references
+  // to the newly allocated objects.
+  virtual void UpdateAndMarkModUnion() { }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(StickyMarkSweep);
 };
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
new file mode 100644
index 0000000..ba3cad6
--- /dev/null
+++ b/runtime/gc/collector_type.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_TYPE_H_
+#define ART_RUNTIME_GC_COLLECTOR_TYPE_H_
+
+#include <ostream>
+
+namespace art {
+namespace gc {
+
+// Which types of collections are able to be performed.
+enum CollectorType {
+  // Non concurrent mark-sweep.
+  kCollectorTypeMS,
+  // Concurrent mark-sweep.
+  kCollectorTypeCMS,
+  // Semi-space / mark-sweep hybrid, enables compaction.
+  kCollectorTypeSS,
+};
+std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_TYPE_H_
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 873eadc..5eda0b9 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -20,8 +20,10 @@
 #include "heap.h"
 
 #include "debugger.h"
+#include "gc/space/bump_pointer_space-inl.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/rosalloc_space-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
 #include "thread.h"
@@ -30,101 +32,129 @@
 namespace art {
 namespace gc {
 
-inline mirror::Object* Heap::AllocObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count) {
-  DebugCheckPreconditionsForAllobObject(c, byte_count);
-  mirror::Object* obj;
-  size_t bytes_allocated;
-  AllocationTimer alloc_timer(this, &obj);
-  bool large_object_allocation = TryAllocLargeObjectUninstrumented(self, c, byte_count,
-                                                                   &obj, &bytes_allocated);
-  if (LIKELY(!large_object_allocation)) {
-    // Non-large object allocation.
-    obj = AllocateUninstrumented(self, alloc_space_, byte_count, &bytes_allocated);
-    // Ensure that we did not allocate into a zygote space.
-    DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
-  }
-  if (LIKELY(obj != NULL)) {
-    obj->SetClass(c);
-    // Record allocation after since we want to use the atomic add for the atomic fence to guard
-    // the SetClass since we do not want the class to appear NULL in another thread.
-    size_t new_num_bytes_allocated = RecordAllocationUninstrumented(bytes_allocated, obj);
-    DCHECK(!Dbg::IsAllocTrackingEnabled());
-    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
-    if (kDesiredHeapVerification > kNoHeapVerification) {
-      VerifyObject(obj);
-    }
-    return obj;
-  }
-  ThrowOutOfMemoryError(self, byte_count, large_object_allocation);
-  return NULL;
-}
-
-inline size_t Heap::RecordAllocationUninstrumented(size_t size, mirror::Object* obj) {
-  DCHECK(obj != NULL);
-  DCHECK_GT(size, 0u);
-  size_t old_num_bytes_allocated = static_cast<size_t>(num_bytes_allocated_.fetch_add(size));
-
-  DCHECK(!Runtime::Current()->HasStatsEnabled());
-
-  // This is safe to do since the GC will never free objects which are neither in the allocation
-  // stack or the live bitmap.
-  while (!allocation_stack_->AtomicPushBack(obj)) {
-    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-  }
-
-  return old_num_bytes_allocated + size;
-}
-
-inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                                         bool grow, size_t* bytes_allocated) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
-    return NULL;
-  }
-  DCHECK(!running_on_valgrind_);
-  return space->Alloc(self, alloc_size, bytes_allocated);
-}
-
-// DlMallocSpace-specific version.
-inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                                         bool grow, size_t* bytes_allocated) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
-    return NULL;
-  }
-  DCHECK(!running_on_valgrind_);
-  return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
-}
-
-template <class T>
-inline mirror::Object* Heap::AllocateUninstrumented(Thread* self, T* space, size_t alloc_size,
-                                                    size_t* bytes_allocated) {
+template <bool kInstrumented, typename PreFenceVisitor>
+inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Class* klass,
+                                                      size_t byte_count, AllocatorType allocator,
+                                                      const PreFenceVisitor& pre_fence_visitor) {
+  DebugCheckPreconditionsForAllocObject(klass, byte_count);
   // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
   // done in the runnable state where suspension is expected.
   DCHECK_EQ(self->GetState(), kRunnable);
   self->AssertThreadSuspensionIsAllowable();
-
-  mirror::Object* ptr = TryToAllocateUninstrumented(self, space, alloc_size, false, bytes_allocated);
-  if (LIKELY(ptr != NULL)) {
-    return ptr;
+  mirror::Object* obj;
+  size_t bytes_allocated;
+  AllocationTimer alloc_timer(this, &obj);
+  if (UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
+    obj = TryToAllocate<kInstrumented>(self, kAllocatorTypeLOS, byte_count, false,
+                                       &bytes_allocated);
+    allocator = kAllocatorTypeLOS;
+  } else {
+    obj = TryToAllocate<kInstrumented>(self, allocator, byte_count, false, &bytes_allocated);
   }
-  return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
+
+  if (UNLIKELY(obj == nullptr)) {
+    SirtRef<mirror::Class> sirt_c(self, klass);
+    obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated);
+    if (obj == nullptr) {
+      return nullptr;
+    } else {
+      klass = sirt_c.get();
+    }
+  }
+  obj->SetClass(klass);
+  pre_fence_visitor(obj);
+  DCHECK_GT(bytes_allocated, 0u);
+  const size_t new_num_bytes_allocated =
+      static_cast<size_t>(num_bytes_allocated_.fetch_add(bytes_allocated)) + bytes_allocated;
+  // TODO: Deprecate.
+  if (kInstrumented) {
+    if (Runtime::Current()->HasStatsEnabled()) {
+      RuntimeStats* thread_stats = self->GetStats();
+      ++thread_stats->allocated_objects;
+      thread_stats->allocated_bytes += bytes_allocated;
+      RuntimeStats* global_stats = Runtime::Current()->GetStats();
+      ++global_stats->allocated_objects;
+      global_stats->allocated_bytes += bytes_allocated;
+    }
+  } else {
+    DCHECK(!Runtime::Current()->HasStatsEnabled());
+  }
+  if (AllocatorHasAllocationStack(allocator)) {
+    // This is safe to do since the GC will never free objects which are neither in the allocation
+    // stack or the live bitmap.
+    while (!allocation_stack_->AtomicPushBack(obj)) {
+      CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+    }
+  }
+  if (kInstrumented) {
+    if (Dbg::IsAllocTrackingEnabled()) {
+      Dbg::RecordAllocation(klass, bytes_allocated);
+    }
+  } else {
+    DCHECK(!Dbg::IsAllocTrackingEnabled());
+  }
+  if (AllocatorHasConcurrentGC(allocator)) {
+    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
+  }
+  if (kIsDebugBuild) {
+    if (kDesiredHeapVerification > kNoHeapVerification) {
+      VerifyObject(obj);
+    }
+    self->VerifyStack();
+  }
+  return obj;
 }
 
-inline bool Heap::TryAllocLargeObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count,
-                                                    mirror::Object** obj_ptr, size_t* bytes_allocated) {
-  bool large_object_allocation = ShouldAllocLargeObject(c, byte_count);
-  if (UNLIKELY(large_object_allocation)) {
-    mirror::Object* obj = AllocateUninstrumented(self, large_object_space_, byte_count, bytes_allocated);
-    // Make sure that our large object didn't get placed anywhere within the space interval or else
-    // it breaks the immune range.
-    DCHECK(obj == NULL ||
-           reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
-           reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
-    *obj_ptr = obj;
+template <const bool kInstrumented>
+inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type,
+                                           size_t alloc_size, bool grow,
+                                           size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return nullptr;
   }
-  return large_object_allocation;
+  if (kInstrumented) {
+    if (UNLIKELY(running_on_valgrind_ && allocator_type == kAllocatorTypeFreeList)) {
+      return non_moving_space_->Alloc(self, alloc_size, bytes_allocated);
+    }
+  }
+  mirror::Object* ret;
+  switch (allocator_type) {
+    case kAllocatorTypeBumpPointer: {
+      DCHECK(bump_pointer_space_ != nullptr);
+      alloc_size = RoundUp(alloc_size, space::BumpPointerSpace::kAlignment);
+      ret = bump_pointer_space_->AllocNonvirtual(alloc_size);
+      if (LIKELY(ret != nullptr)) {
+        *bytes_allocated = alloc_size;
+      }
+      break;
+    }
+    case kAllocatorTypeFreeList: {
+      if (kUseRosAlloc) {
+        ret = reinterpret_cast<space::RosAllocSpace*>(non_moving_space_)->AllocNonvirtual(
+            self, alloc_size, bytes_allocated);
+      } else {
+        ret = reinterpret_cast<space::DlMallocSpace*>(non_moving_space_)->AllocNonvirtual(
+            self, alloc_size, bytes_allocated);
+      }
+      break;
+    }
+    case kAllocatorTypeLOS: {
+      ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated);
+      // Note that the bump pointer spaces aren't necessarily next to
+      // the other continuous spaces like the non-moving alloc space or
+      // the zygote space.
+      DCHECK(ret == nullptr || large_object_space_->Contains(ret));
+      break;
+    }
+    default: {
+      LOG(FATAL) << "Invalid allocator type";
+      ret = nullptr;
+    }
+  }
+  return ret;
 }
 
-inline void Heap::DebugCheckPreconditionsForAllobObject(mirror::Class* c, size_t byte_count) {
+inline void Heap::DebugCheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) {
   DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
          (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
          strlen(ClassHelper(c).GetDescriptor()) == 0);
@@ -142,14 +172,14 @@
   if (kMeasureAllocationTime) {
     mirror::Object* allocated_obj = *allocated_obj_ptr_;
     // Only if the allocation succeeded, record the time.
-    if (allocated_obj != NULL) {
+    if (allocated_obj != nullptr) {
       uint64_t allocation_end_time = NanoTime() / kTimeAdjust;
       heap_->total_allocation_time_.fetch_add(allocation_end_time - allocation_start_time_);
     }
   }
 };
 
-inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) {
+inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const {
   // We need to have a zygote space or else our newly allocated large object can end up in the
   // Zygote resulting in it being prematurely freed.
   // We can only do this for primitive objects since large objects will not be within the card table
@@ -174,7 +204,8 @@
   return false;
 }
 
-inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, mirror::Object* obj) {
+inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
+                                    mirror::Object* obj) {
   if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) {
     // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
     SirtRef<mirror::Object> ref(self, obj);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 804c669..5e62729 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -23,6 +23,7 @@
 #include <vector>
 #include <valgrind.h>
 
+#include "base/histogram-inl.h"
 #include "base/stl_util.h"
 #include "common_throws.h"
 #include "cutils/sched_policy.h"
@@ -30,14 +31,18 @@
 #include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap-inl.h"
+#include "gc/accounting/mod_union_table.h"
 #include "gc/accounting/mod_union_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/collector/partial_mark_sweep.h"
+#include "gc/collector/semi_space.h"
 #include "gc/collector/sticky_mark_sweep.h"
+#include "gc/space/bump_pointer_space.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/rosalloc_space-inl.h"
 #include "gc/space/space-inl.h"
 #include "heap-inl.h"
 #include "image.h"
@@ -49,6 +54,7 @@
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
 #include "os.h"
+#include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "sirt_ref.h"
@@ -57,22 +63,24 @@
 #include "well_known_classes.h"
 
 namespace art {
+
+extern void SetQuickAllocEntryPointsAllocator(gc::AllocatorType allocator);
+
 namespace gc {
 
 static constexpr bool kGCALotMode = false;
 static constexpr size_t kGcAlotInterval = KB;
-static constexpr bool kDumpGcPerformanceOnShutdown = false;
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
 static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& image_file_name,
-           bool concurrent_gc, size_t parallel_gc_threads, size_t conc_gc_threads,
+           CollectorType collector_type, size_t parallel_gc_threads, size_t conc_gc_threads,
            bool low_memory_mode, size_t long_pause_log_threshold, size_t long_gc_log_threshold,
            bool ignore_max_footprint)
-    : alloc_space_(NULL),
-      card_table_(NULL),
-      concurrent_gc_(concurrent_gc),
+    : non_moving_space_(nullptr),
+      concurrent_gc_(collector_type == gc::kCollectorTypeCMS),
+      collector_type_(collector_type),
       parallel_gc_threads_(parallel_gc_threads),
       conc_gc_threads_(conc_gc_threads),
       low_memory_mode_(low_memory_mode),
@@ -80,10 +88,11 @@
       long_gc_log_threshold_(long_gc_log_threshold),
       ignore_max_footprint_(ignore_max_footprint),
       have_zygote_space_(false),
-      soft_ref_queue_lock_(NULL),
-      weak_ref_queue_lock_(NULL),
-      finalizer_ref_queue_lock_(NULL),
-      phantom_ref_queue_lock_(NULL),
+      soft_reference_queue_(this),
+      weak_reference_queue_(this),
+      finalizer_reference_queue_(this),
+      phantom_reference_queue_(this),
+      cleared_references_(this),
       is_gc_running_(false),
       last_gc_type_(collector::kGcTypeNone),
       next_gc_type_(collector::kGcTypePartial),
@@ -92,6 +101,7 @@
       max_allowed_footprint_(initial_size),
       native_footprint_gc_watermark_(initial_size),
       native_footprint_limit_(2 * initial_size),
+      native_need_to_run_finalization_(false),
       activity_thread_class_(NULL),
       application_thread_class_(NULL),
       activity_thread_(NULL),
@@ -122,7 +132,11 @@
        * searching.
        */
       max_allocation_stack_size_(kGCALotMode ? kGcAlotInterval
-          : (kDesiredHeapVerification > kNoHeapVerification) ? KB : MB),
+          : (kDesiredHeapVerification > kVerifyAllFast) ? KB : MB),
+      current_allocator_(kMovingCollector ? kAllocatorTypeBumpPointer : kAllocatorTypeFreeList),
+      current_non_moving_allocator_(kAllocatorTypeFreeList),
+      bump_pointer_space_(nullptr),
+      temp_space_(nullptr),
       reference_referent_offset_(0),
       reference_queue_offset_(0),
       reference_queueNext_offset_(0),
@@ -134,38 +148,59 @@
       total_wait_time_(0),
       total_allocation_time_(0),
       verify_object_mode_(kHeapVerificationNotPermitted),
+      gc_disable_count_(0),
       running_on_valgrind_(RUNNING_ON_VALGRIND) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
-
+  // If we aren't the zygote, switch to the default non zygote allocator. This may update the
+  // entrypoints.
+  if (!Runtime::Current()->IsZygote()) {
+    ChangeCollector(collector_type_);
+  }
   live_bitmap_.reset(new accounting::HeapBitmap(this));
   mark_bitmap_.reset(new accounting::HeapBitmap(this));
-
   // Requested begin for the alloc space, to follow the mapped image and oat files
-  byte* requested_alloc_space_begin = NULL;
+  byte* requested_alloc_space_begin = nullptr;
   if (!image_file_name.empty()) {
     space::ImageSpace* image_space = space::ImageSpace::Create(image_file_name.c_str());
-    CHECK(image_space != NULL) << "Failed to create space for " << image_file_name;
-    AddContinuousSpace(image_space);
+    CHECK(image_space != nullptr) << "Failed to create space for " << image_file_name;
+    AddSpace(image_space);
     // Oat files referenced by image files immediately follow them in memory, ensure alloc space
     // isn't going to get in the middle
     byte* oat_file_end_addr = image_space->GetImageHeader().GetOatFileEnd();
     CHECK_GT(oat_file_end_addr, image_space->End());
     if (oat_file_end_addr > requested_alloc_space_begin) {
-      requested_alloc_space_begin =
-          reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(oat_file_end_addr),
-                                          kPageSize));
+      requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
     }
   }
 
-  alloc_space_ = space::DlMallocSpace::Create(Runtime::Current()->IsZygote() ? "zygote space" : "alloc space",
-                                              initial_size,
-                                              growth_limit, capacity,
-                                              requested_alloc_space_begin);
-  CHECK(alloc_space_ != NULL) << "Failed to create alloc space";
-  alloc_space_->SetFootprintLimit(alloc_space_->Capacity());
-  AddContinuousSpace(alloc_space_);
+  const char* name = Runtime::Current()->IsZygote() ? "zygote space" : "alloc space";
+  if (!kUseRosAlloc) {
+    non_moving_space_ = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                     requested_alloc_space_begin);
+  } else {
+    non_moving_space_ = space::RosAllocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                     requested_alloc_space_begin);
+  }
+  if (kMovingCollector) {
+    // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
+    // TODO: Having 3+ spaces as big as the large heap size can cause virtual memory fragmentation
+    // issues.
+    const size_t bump_pointer_space_size = std::min(non_moving_space_->Capacity(), 128 * MB);
+    bump_pointer_space_ = space::BumpPointerSpace::Create("Bump pointer space",
+                                                          bump_pointer_space_size, nullptr);
+    CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space";
+    AddSpace(bump_pointer_space_);
+    temp_space_ = space::BumpPointerSpace::Create("Bump pointer space 2", bump_pointer_space_size,
+                                                  nullptr);
+    CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space";
+    AddSpace(temp_space_);
+  }
+
+  CHECK(non_moving_space_ != NULL) << "Failed to create non-moving space";
+  non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
+  AddSpace(non_moving_space_);
 
   // Allocate the large object space.
   const bool kUseFreeListSpaceForLOS = false;
@@ -175,22 +210,23 @@
     large_object_space_ = space::LargeObjectMapSpace::Create("large object space");
   }
   CHECK(large_object_space_ != NULL) << "Failed to create large object space";
-  AddDiscontinuousSpace(large_object_space_);
+  AddSpace(large_object_space_);
 
   // Compute heap capacity. Continuous spaces are sorted in order of Begin().
+  CHECK(!continuous_spaces_.empty());
+  // Relies on the spaces being sorted.
   byte* heap_begin = continuous_spaces_.front()->Begin();
-  size_t heap_capacity = continuous_spaces_.back()->End() - continuous_spaces_.front()->Begin();
-  if (continuous_spaces_.back()->IsDlMallocSpace()) {
-    heap_capacity += continuous_spaces_.back()->AsDlMallocSpace()->NonGrowthLimitCapacity();
-  }
+  byte* heap_end = continuous_spaces_.back()->Limit();
+  size_t heap_capacity = heap_end - heap_begin;
 
   // Allocate the card table.
   card_table_.reset(accounting::CardTable::Create(heap_begin, heap_capacity));
   CHECK(card_table_.get() != NULL) << "Failed to create card table";
 
+  // Card cache for now since it makes it easier for us to update the references to the copying
+  // spaces.
   accounting::ModUnionTable* mod_union_table =
-      new accounting::ModUnionTableToZygoteAllocspace("Image mod-union table", this,
-                                                      GetImageSpace());
+      new accounting::ModUnionTableCardCache("Image mod-union table", this, GetImageSpace());
   CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
   AddModUnionTable(mod_union_table);
 
@@ -211,33 +247,33 @@
   gc_complete_lock_ = new Mutex("GC complete lock");
   gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable",
                                                 *gc_complete_lock_));
-
-  // Create the reference queue locks, this is required so for parallel object scanning in the GC.
-  soft_ref_queue_lock_ = new Mutex("Soft reference queue lock");
-  weak_ref_queue_lock_ = new Mutex("Weak reference queue lock");
-  finalizer_ref_queue_lock_ = new Mutex("Finalizer reference queue lock");
-  phantom_ref_queue_lock_ = new Mutex("Phantom reference queue lock");
-
   last_gc_time_ns_ = NanoTime();
   last_gc_size_ = GetBytesAllocated();
 
   if (ignore_max_footprint_) {
     SetIdealFootprint(std::numeric_limits<size_t>::max());
-    concurrent_start_bytes_ = max_allowed_footprint_;
+    concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
   }
+  CHECK_NE(max_allowed_footprint_, 0U);
 
   // Create our garbage collectors.
   for (size_t i = 0; i < 2; ++i) {
     const bool concurrent = i != 0;
-    mark_sweep_collectors_.push_back(new collector::MarkSweep(this, concurrent));
-    mark_sweep_collectors_.push_back(new collector::PartialMarkSweep(this, concurrent));
-    mark_sweep_collectors_.push_back(new collector::StickyMarkSweep(this, concurrent));
+    garbage_collectors_.push_back(new collector::MarkSweep(this, concurrent));
+    garbage_collectors_.push_back(new collector::PartialMarkSweep(this, concurrent));
+    garbage_collectors_.push_back(new collector::StickyMarkSweep(this, concurrent));
+  }
+  gc_plan_.push_back(collector::kGcTypeSticky);
+  gc_plan_.push_back(collector::kGcTypePartial);
+  gc_plan_.push_back(collector::kGcTypeFull);
+  if (kMovingCollector) {
+    // TODO: Clean this up.
+    semi_space_collector_ = new collector::SemiSpace(this);
+    garbage_collectors_.push_back(semi_space_collector_);
   }
 
-  CHECK_NE(max_allowed_footprint_, 0U);
-
   if (running_on_valgrind_) {
-    Runtime::Current()->InstrumentQuickAllocEntryPoints();
+    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
   }
 
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
@@ -245,19 +281,100 @@
   }
 }
 
+void Heap::ChangeAllocator(AllocatorType allocator) {
+  DCHECK_NE(allocator, kAllocatorTypeLOS);
+  if (current_allocator_ != allocator) {
+    current_allocator_ = allocator;
+    SetQuickAllocEntryPointsAllocator(current_allocator_);
+    Runtime::Current()->GetInstrumentation()->ResetQuickAllocEntryPoints();
+  }
+}
+
+bool Heap::IsCompilingBoot() const {
+  for (const auto& space : continuous_spaces_) {
+    if (space->IsImageSpace()) {
+      return false;
+    } else if (space->IsZygoteSpace()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Heap::HasImageSpace() const {
+  for (const auto& space : continuous_spaces_) {
+    if (space->IsImageSpace()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void Heap::IncrementDisableGC(Thread* self) {
+  // Need to do this holding the lock to prevent races where the GC is about to run / running when
+  // we attempt to disable it.
+  ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+  MutexLock mu(self, *gc_complete_lock_);
+  WaitForGcToCompleteLocked(self);
+  ++gc_disable_count_;
+}
+
+void Heap::DecrementDisableGC(Thread* self) {
+  MutexLock mu(self, *gc_complete_lock_);
+  CHECK_GE(gc_disable_count_, 0U);
+  --gc_disable_count_;
+}
+
 void Heap::CreateThreadPool() {
   const size_t num_threads = std::max(parallel_gc_threads_, conc_gc_threads_);
   if (num_threads != 0) {
-    thread_pool_.reset(new ThreadPool(num_threads));
+    thread_pool_.reset(new ThreadPool("Heap thread pool", num_threads));
   }
 }
 
+void Heap::VisitObjects(ObjectVisitorCallback callback, void* arg) {
+  // Visit objects in bump pointer space.
+  Thread* self = Thread::Current();
+  // TODO: Use reference block.
+  std::vector<SirtRef<mirror::Object>*> saved_refs;
+  if (bump_pointer_space_ != nullptr) {
+    // Need to put all these in sirts since the callback may trigger a GC. TODO: Use a better data
+    // structure.
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(bump_pointer_space_->Begin());
+    const mirror::Object* end = reinterpret_cast<const mirror::Object*>(
+        bump_pointer_space_->End());
+    while (obj < end) {
+      saved_refs.push_back(new SirtRef<mirror::Object>(self, obj));
+      obj = space::BumpPointerSpace::GetNextObject(obj);
+    }
+  }
+  // TODO: Switch to standard begin and end to use ranged a based loop.
+  for (mirror::Object** it = allocation_stack_->Begin(), **end = allocation_stack_->End();
+      it < end; ++it) {
+    mirror::Object* obj = *it;
+    // Objects in the allocation stack might be in a movable space.
+    saved_refs.push_back(new SirtRef<mirror::Object>(self, obj));
+  }
+  GetLiveBitmap()->Walk(callback, arg);
+  for (const auto& ref : saved_refs) {
+    callback(ref->get(), arg);
+  }
+  // Need to free the sirts in reverse order they were allocated.
+  for (size_t i = saved_refs.size(); i != 0; --i) {
+    delete saved_refs[i - 1];
+  }
+}
+
+void Heap::MarkAllocStackAsLive(accounting::ObjectStack* stack) {
+  MarkAllocStack(non_moving_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(), stack);
+}
+
 void Heap::DeleteThreadPool() {
   thread_pool_.reset(nullptr);
 }
 
 static bool ReadStaticInt(JNIEnvExt* env, jclass clz, const char* name, int* out_value) {
-  CHECK(out_value != NULL);
+  DCHECK(out_value != NULL);
   jfieldID field = env->GetStaticFieldID(clz, name, "I");
   if (field == NULL) {
     env->ExceptionClear();
@@ -374,62 +491,71 @@
   }
 }
 
-void Heap::AddContinuousSpace(space::ContinuousSpace* space) {
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+void Heap::AddSpace(space::Space* space) {
   DCHECK(space != NULL);
-  DCHECK(space->GetLiveBitmap() != NULL);
-  live_bitmap_->AddContinuousSpaceBitmap(space->GetLiveBitmap());
-  DCHECK(space->GetMarkBitmap() != NULL);
-  mark_bitmap_->AddContinuousSpaceBitmap(space->GetMarkBitmap());
-  continuous_spaces_.push_back(space);
-  if (space->IsDlMallocSpace() && !space->IsLargeObjectSpace()) {
-    alloc_space_ = space->AsDlMallocSpace();
-  }
-
-  // Ensure that spaces remain sorted in increasing order of start address (required for CMS finger)
-  std::sort(continuous_spaces_.begin(), continuous_spaces_.end(),
-            [](const space::ContinuousSpace* a, const space::ContinuousSpace* b) {
-              return a->Begin() < b->Begin();
-            });
-
-  // Ensure that ImageSpaces < ZygoteSpaces < AllocSpaces so that we can do address based checks to
-  // avoid redundant marking.
-  bool seen_zygote = false, seen_alloc = false;
-  for (const auto& space : continuous_spaces_) {
-    if (space->IsImageSpace()) {
-      DCHECK(!seen_zygote);
-      DCHECK(!seen_alloc);
-    } else if (space->IsZygoteSpace()) {
-      DCHECK(!seen_alloc);
-      seen_zygote = true;
-    } else if (space->IsDlMallocSpace()) {
-      seen_alloc = true;
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  if (space->IsContinuousSpace()) {
+    DCHECK(!space->IsDiscontinuousSpace());
+    space::ContinuousSpace* continuous_space = space->AsContinuousSpace();
+    // Continuous spaces don't necessarily have bitmaps.
+    accounting::SpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
+    accounting::SpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
+    if (live_bitmap != nullptr) {
+      DCHECK(mark_bitmap != nullptr);
+      live_bitmap_->AddContinuousSpaceBitmap(live_bitmap);
+      mark_bitmap_->AddContinuousSpaceBitmap(mark_bitmap);
     }
+
+    continuous_spaces_.push_back(continuous_space);
+    if (continuous_space->IsMallocSpace()) {
+      non_moving_space_ = continuous_space->AsMallocSpace();
+    }
+
+    // Ensure that spaces remain sorted in increasing order of start address.
+    std::sort(continuous_spaces_.begin(), continuous_spaces_.end(),
+              [](const space::ContinuousSpace* a, const space::ContinuousSpace* b) {
+      return a->Begin() < b->Begin();
+    });
+    // Ensure that ImageSpaces < ZygoteSpaces < AllocSpaces so that we can do address based checks to
+    // avoid redundant marking.
+    bool seen_zygote = false, seen_alloc = false;
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsImageSpace()) {
+        CHECK(!seen_zygote);
+        CHECK(!seen_alloc);
+      } else if (space->IsZygoteSpace()) {
+        CHECK(!seen_alloc);
+        seen_zygote = true;
+      } else if (space->IsMallocSpace()) {
+        seen_alloc = true;
+      }
+    }
+  } else {
+    DCHECK(space->IsDiscontinuousSpace());
+    space::DiscontinuousSpace* discontinuous_space = space->AsDiscontinuousSpace();
+    DCHECK(discontinuous_space->GetLiveObjects() != nullptr);
+    live_bitmap_->AddDiscontinuousObjectSet(discontinuous_space->GetLiveObjects());
+    DCHECK(discontinuous_space->GetMarkObjects() != nullptr);
+    mark_bitmap_->AddDiscontinuousObjectSet(discontinuous_space->GetMarkObjects());
+    discontinuous_spaces_.push_back(discontinuous_space);
+  }
+  if (space->IsAllocSpace()) {
+    alloc_spaces_.push_back(space->AsAllocSpace());
   }
 }
 
 void Heap::RegisterGCAllocation(size_t bytes) {
-  if (this != NULL) {
+  if (this != nullptr) {
     gc_memory_overhead_.fetch_add(bytes);
   }
 }
 
 void Heap::RegisterGCDeAllocation(size_t bytes) {
-  if (this != NULL) {
+  if (this != nullptr) {
     gc_memory_overhead_.fetch_sub(bytes);
   }
 }
 
-void Heap::AddDiscontinuousSpace(space::DiscontinuousSpace* space) {
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-  DCHECK(space != NULL);
-  DCHECK(space->GetLiveObjects() != NULL);
-  live_bitmap_->AddDiscontinuousObjectSet(space->GetLiveObjects());
-  DCHECK(space->GetMarkObjects() != NULL);
-  mark_bitmap_->AddDiscontinuousObjectSet(space->GetMarkObjects());
-  discontinuous_spaces_.push_back(space);
-}
-
 void Heap::DumpGcPerformanceInfo(std::ostream& os) {
   // Dump cumulative timings.
   os << "Dumping cumulative Gc timings\n";
@@ -437,7 +563,7 @@
 
   // Dump cumulative loggers for each GC type.
   uint64_t total_paused_time = 0;
-  for (const auto& collector : mark_sweep_collectors_) {
+  for (const auto& collector : garbage_collectors_) {
     CumulativeLogger& logger = collector->GetCumulativeTimings();
     if (logger.GetTotalNs() != 0) {
       os << Dumpable<CumulativeLogger>(logger);
@@ -446,8 +572,10 @@
       double seconds = NsToMs(logger.GetTotalNs()) / 1000.0;
       const uint64_t freed_bytes = collector->GetTotalFreedBytes();
       const uint64_t freed_objects = collector->GetTotalFreedObjects();
+      Histogram<uint64_t>::CumulativeData cumulative_data;
+      collector->GetPauseHistogram().CreateHistogram(&cumulative_data);
+      collector->GetPauseHistogram().PrintConfidenceIntervals(os, 0.99, cumulative_data);
       os << collector->GetName() << " total time: " << PrettyDuration(total_ns) << "\n"
-         << collector->GetName() << " paused time: " << PrettyDuration(total_pause_ns) << "\n"
          << collector->GetName() << " freed: " << freed_objects
          << " objects with total size " << PrettySize(freed_bytes) << "\n"
          << collector->GetName() << " throughput: " << freed_objects / seconds << "/s / "
@@ -480,25 +608,16 @@
 }
 
 Heap::~Heap() {
-  if (kDumpGcPerformanceOnShutdown) {
-    DumpGcPerformanceInfo(LOG(INFO));
-  }
-
-  STLDeleteElements(&mark_sweep_collectors_);
-
-  // If we don't reset then the mark stack complains in it's destructor.
+  VLOG(heap) << "Starting ~Heap()";
+  STLDeleteElements(&garbage_collectors_);
+  // If we don't reset then the mark stack complains in its destructor.
   allocation_stack_->Reset();
   live_stack_->Reset();
-
-  VLOG(heap) << "~Heap()";
   STLDeleteValues(&mod_union_tables_);
   STLDeleteElements(&continuous_spaces_);
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
-  delete soft_ref_queue_lock_;
-  delete weak_ref_queue_lock_;
-  delete finalizer_ref_queue_lock_;
-  delete phantom_ref_queue_lock_;
+  VLOG(heap) << "Finished ~Heap()";
 }
 
 space::ContinuousSpace* Heap::FindContinuousSpaceFromObject(const mirror::Object* obj,
@@ -535,6 +654,106 @@
   return FindDiscontinuousSpaceFromObject(obj, true);
 }
 
+struct SoftReferenceArgs {
+  RootVisitor* is_marked_callback_;
+  RootVisitor* recursive_mark_callback_;
+  void* arg_;
+};
+
+mirror::Object* Heap::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
+  SoftReferenceArgs* args  = reinterpret_cast<SoftReferenceArgs*>(arg);
+  // TODO: Not preserve all soft references.
+  return args->recursive_mark_callback_(obj, args->arg_);
+}
+
+// Process reference class instances and schedule finalizations.
+void Heap::ProcessReferences(TimingLogger& timings, bool clear_soft,
+                             RootVisitor* is_marked_callback,
+                             RootVisitor* recursive_mark_object_callback, void* arg) {
+  // Unless we are in the zygote or required to clear soft references with white references,
+  // preserve some white referents.
+  if (!clear_soft && !Runtime::Current()->IsZygote()) {
+    SoftReferenceArgs soft_reference_args;
+    soft_reference_args.is_marked_callback_ = is_marked_callback;
+    soft_reference_args.recursive_mark_callback_ = recursive_mark_object_callback;
+    soft_reference_args.arg_ = arg;
+    soft_reference_queue_.PreserveSomeSoftReferences(&PreserveSoftReferenceCallback,
+                                                     &soft_reference_args);
+  }
+  timings.StartSplit("ProcessReferences");
+  // Clear all remaining soft and weak references with white referents.
+  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  timings.EndSplit();
+  // Preserve all white objects with finalize methods and schedule them for finalization.
+  timings.StartSplit("EnqueueFinalizerReferences");
+  finalizer_reference_queue_.EnqueueFinalizerReferences(cleared_references_, is_marked_callback,
+                                                        recursive_mark_object_callback, arg);
+  timings.EndSplit();
+  timings.StartSplit("ProcessReferences");
+  // Clear all f-reachable soft and weak references with white referents.
+  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  // Clear all phantom references with white referents.
+  phantom_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  // At this point all reference queues other than the cleared references should be empty.
+  DCHECK(soft_reference_queue_.IsEmpty());
+  DCHECK(weak_reference_queue_.IsEmpty());
+  DCHECK(finalizer_reference_queue_.IsEmpty());
+  DCHECK(phantom_reference_queue_.IsEmpty());
+  timings.EndSplit();
+}
+
+bool Heap::IsEnqueued(mirror::Object* ref) const {
+  // Since the references are stored as cyclic lists it means that once enqueued, the pending next
+  // will always be non-null.
+  return ref->GetFieldObject<mirror::Object*>(GetReferencePendingNextOffset(), false) != nullptr;
+}
+
+bool Heap::IsEnqueuable(const mirror::Object* ref) const {
+  DCHECK(ref != nullptr);
+  const mirror::Object* queue =
+      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueOffset(), false);
+  const mirror::Object* queue_next =
+      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueNextOffset(), false);
+  return queue != nullptr && queue_next == nullptr;
+}
+
+// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// marked, put it on the appropriate list in the heap for later processing.
+void Heap::DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj,
+                                  RootVisitor mark_visitor, void* arg) {
+  DCHECK(klass != nullptr);
+  DCHECK(klass->IsReferenceClass());
+  DCHECK(obj != nullptr);
+  mirror::Object* referent = GetReferenceReferent(obj);
+  if (referent != nullptr) {
+    mirror::Object* forward_address = mark_visitor(referent, arg);
+    // Null means that the object is not currently marked.
+    if (forward_address == nullptr) {
+      Thread* self = Thread::Current();
+      // TODO: Remove these locks, and use atomic stacks for storing references?
+      // We need to check that the references haven't already been enqueued since we can end up
+      // scanning the same reference multiple times due to dirty cards.
+      if (klass->IsSoftReferenceClass()) {
+        soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else if (klass->IsWeakReferenceClass()) {
+        weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else if (klass->IsFinalizerReferenceClass()) {
+        finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else if (klass->IsPhantomReferenceClass()) {
+        phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else {
+        LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
+                   << klass->GetAccessFlags();
+      }
+    } else if (referent != forward_address) {
+      // Referent is already marked and we need to update it.
+      SetReferenceReferent(obj, forward_address);
+    }
+  }
+}
+
 space::ImageSpace* Heap::GetImageSpace() const {
   for (const auto& space : continuous_spaces_) {
     if (space->IsImageSpace()) {
@@ -562,8 +781,15 @@
   if (!large_object_allocation && total_bytes_free >= byte_count) {
     size_t max_contiguous_allocation = 0;
     for (const auto& space : continuous_spaces_) {
-      if (space->IsDlMallocSpace()) {
-        space->AsDlMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+      if (space->IsMallocSpace()) {
+        // To allow the Walk/InspectAll() to exclusively-lock the mutator
+        // lock, temporarily release the shared access to the mutator
+        // lock here by transitioning to the suspended state.
+        Locks::mutator_lock_->AssertSharedHeld(self);
+        self->TransitionFromRunnableToSuspended(kSuspended);
+        space->AsMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+        self->TransitionFromSuspendedToRunnable();
+        Locks::mutator_lock_->AssertSharedHeld(self);
       }
     }
     oss << "; failed due to fragmentation (largest possible contiguous allocation "
@@ -572,68 +798,57 @@
   self->ThrowOutOfMemoryError(oss.str().c_str());
 }
 
-inline bool Heap::TryAllocLargeObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count,
-                                                  mirror::Object** obj_ptr, size_t* bytes_allocated) {
-  bool large_object_allocation = ShouldAllocLargeObject(c, byte_count);
-  if (UNLIKELY(large_object_allocation)) {
-    mirror::Object* obj = AllocateInstrumented(self, large_object_space_, byte_count, bytes_allocated);
-    // Make sure that our large object didn't get placed anywhere within the space interval or else
-    // it breaks the immune range.
-    DCHECK(obj == NULL ||
-           reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
-           reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
-    *obj_ptr = obj;
+void Heap::Trim() {
+  uint64_t start_ns = NanoTime();
+  // Trim the managed spaces.
+  uint64_t total_alloc_space_allocated = 0;
+  uint64_t total_alloc_space_size = 0;
+  uint64_t managed_reclaimed = 0;
+  for (const auto& space : continuous_spaces_) {
+    if (space->IsMallocSpace() && !space->IsZygoteSpace()) {
+      gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
+      total_alloc_space_size += alloc_space->Size();
+      managed_reclaimed += alloc_space->Trim();
+    }
   }
-  return large_object_allocation;
+  total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated() -
+      bump_pointer_space_->GetBytesAllocated();
+  const float managed_utilization = static_cast<float>(total_alloc_space_allocated) /
+      static_cast<float>(total_alloc_space_size);
+  uint64_t gc_heap_end_ns = NanoTime();
+  // Trim the native heap.
+  dlmalloc_trim(0);
+  size_t native_reclaimed = 0;
+  dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
+  uint64_t end_ns = NanoTime();
+  VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
+      << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
+      << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed)
+      << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization)
+      << "%.";
 }
 
-mirror::Object* Heap::AllocObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count) {
-  DebugCheckPreconditionsForAllobObject(c, byte_count);
-  mirror::Object* obj;
-  size_t bytes_allocated;
-  AllocationTimer alloc_timer(this, &obj);
-  bool large_object_allocation = TryAllocLargeObjectInstrumented(self, c, byte_count,
-                                                                 &obj, &bytes_allocated);
-  if (LIKELY(!large_object_allocation)) {
-    // Non-large object allocation.
-    obj = AllocateInstrumented(self, alloc_space_, byte_count, &bytes_allocated);
-    // Ensure that we did not allocate into a zygote space.
-    DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
-  }
-  if (LIKELY(obj != NULL)) {
-    obj->SetClass(c);
-    // Record allocation after since we want to use the atomic add for the atomic fence to guard
-    // the SetClass since we do not want the class to appear NULL in another thread.
-    size_t new_num_bytes_allocated = RecordAllocationInstrumented(bytes_allocated, obj);
-    if (Dbg::IsAllocTrackingEnabled()) {
-      Dbg::RecordAllocation(c, byte_count);
-    }
-    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
-    if (kDesiredHeapVerification > kNoHeapVerification) {
-      VerifyObject(obj);
-    }
-    return obj;
-  }
-  ThrowOutOfMemoryError(self, byte_count, large_object_allocation);
-  return NULL;
-}
-
-bool Heap::IsHeapAddress(const mirror::Object* obj) {
-  // Note: we deliberately don't take the lock here, and mustn't test anything that would
-  // require taking the lock.
-  if (obj == NULL) {
+bool Heap::IsValidObjectAddress(const mirror::Object* obj) const {
+  // Note: we deliberately don't take the lock here, and mustn't test anything that would require
+  // taking the lock.
+  if (obj == nullptr) {
     return true;
   }
-  if (UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
-    return false;
+  return IsAligned<kObjectAlignment>(obj) && IsHeapAddress(obj);
+}
+
+bool Heap::IsHeapAddress(const mirror::Object* obj) const {
+  if (kMovingCollector && bump_pointer_space_->HasAddress(obj)) {
+    return true;
   }
-  return FindSpaceFromObject(obj, true) != NULL;
+  // TODO: This probably doesn't work for large objects.
+  return FindSpaceFromObject(obj, true) != nullptr;
 }
 
 bool Heap::IsLiveObjectLocked(const mirror::Object* obj, bool search_allocation_stack,
                               bool search_live_stack, bool sorted) {
   // Locks::heap_bitmap_lock_->AssertReaderHeld(Thread::Current());
-  if (obj == NULL || UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
+  if (obj == nullptr || UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
     return false;
   }
   space::ContinuousSpace* c_space = FindContinuousSpaceFromObject(obj, true);
@@ -642,6 +857,8 @@
     if (c_space->GetLiveBitmap()->Test(obj)) {
       return true;
     }
+  } else if (bump_pointer_space_->Contains(obj) || temp_space_->Contains(obj)) {
+      return true;
   } else {
     d_space = FindDiscontinuousSpaceFromObject(obj, true);
     if (d_space != NULL) {
@@ -655,7 +872,6 @@
     if (i > 0) {
       NanoSleep(MsToNs(10));
     }
-
     if (search_allocation_stack) {
       if (sorted) {
         if (allocation_stack_->ContainsSorted(const_cast<mirror::Object*>(obj))) {
@@ -699,16 +915,20 @@
   VerifyObjectBody(obj);
 }
 
-void Heap::DumpSpaces() {
+void Heap::DumpSpaces(std::ostream& stream) {
   for (const auto& space : continuous_spaces_) {
     accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
     accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-    LOG(INFO) << space << " " << *space << "\n"
-              << live_bitmap << " " << *live_bitmap << "\n"
-              << mark_bitmap << " " << *mark_bitmap;
+    stream << space << " " << *space << "\n";
+    if (live_bitmap != nullptr) {
+      stream << live_bitmap << " " << *live_bitmap << "\n";
+    }
+    if (mark_bitmap != nullptr) {
+      stream << mark_bitmap << " " << *mark_bitmap << "\n";
+    }
   }
   for (const auto& space : discontinuous_spaces_) {
-    LOG(INFO) << space << " " << *space << "\n";
+    stream << space << " " << *space << "\n";
   }
 }
 
@@ -735,7 +955,7 @@
   const mirror::Class* c_c_c = *reinterpret_cast<mirror::Class* const *>(raw_addr);
   CHECK_EQ(c_c, c_c_c);
 
-  if (verify_object_mode_ != kVerifyAllFast) {
+  if (verify_object_mode_ > kVerifyAllFast) {
     // TODO: the bitmap tests below are racy if VerifyObjectBody is called without the
     //       heap_bitmap_lock_.
     if (!IsLiveObjectLocked(obj)) {
@@ -758,31 +978,6 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-inline size_t Heap::RecordAllocationInstrumented(size_t size, mirror::Object* obj) {
-  DCHECK(obj != NULL);
-  DCHECK_GT(size, 0u);
-  size_t old_num_bytes_allocated = static_cast<size_t>(num_bytes_allocated_.fetch_add(size));
-
-  if (Runtime::Current()->HasStatsEnabled()) {
-    RuntimeStats* thread_stats = Thread::Current()->GetStats();
-    ++thread_stats->allocated_objects;
-    thread_stats->allocated_bytes += size;
-
-    // TODO: Update these atomically.
-    RuntimeStats* global_stats = Runtime::Current()->GetStats();
-    ++global_stats->allocated_objects;
-    global_stats->allocated_bytes += size;
-  }
-
-  // This is safe to do since the GC will never free objects which are neither in the allocation
-  // stack or the live bitmap.
-  while (!allocation_stack_->AtomicPushBack(obj)) {
-    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-  }
-
-  return old_num_bytes_allocated + size;
-}
-
 void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
   DCHECK_LE(freed_bytes, static_cast<size_t>(num_bytes_allocated_));
   num_bytes_allocated_.fetch_sub(freed_bytes);
@@ -799,111 +994,50 @@
   }
 }
 
-inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                                       bool grow, size_t* bytes_allocated) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
-    return NULL;
-  }
-  return space->Alloc(self, alloc_size, bytes_allocated);
-}
-
-// DlMallocSpace-specific version.
-inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                                       bool grow, size_t* bytes_allocated) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
-    return NULL;
-  }
-  if (LIKELY(!running_on_valgrind_)) {
-    return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
-  } else {
-    return space->Alloc(self, alloc_size, bytes_allocated);
-  }
-}
-
-template <class T>
-inline mirror::Object* Heap::AllocateInstrumented(Thread* self, T* space, size_t alloc_size,
-                                                  size_t* bytes_allocated) {
-  // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
-  // done in the runnable state where suspension is expected.
-  DCHECK_EQ(self->GetState(), kRunnable);
-  self->AssertThreadSuspensionIsAllowable();
-
-  mirror::Object* ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
-  if (LIKELY(ptr != NULL)) {
-    return ptr;
-  }
-  return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
-}
-
-mirror::Object* Heap::AllocateInternalWithGc(Thread* self, space::AllocSpace* space,
+mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocator,
                                              size_t alloc_size, size_t* bytes_allocated) {
-  mirror::Object* ptr;
-
+  mirror::Object* ptr = nullptr;
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
-  collector::GcType last_gc = WaitForConcurrentGcToComplete(self);
+  collector::GcType last_gc = WaitForGcToComplete(self);
   if (last_gc != collector::kGcTypeNone) {
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
-    ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
-    if (ptr != NULL) {
-      return ptr;
-    }
+    ptr = TryToAllocate<true>(self, allocator, alloc_size, false, bytes_allocated);
   }
 
   // Loop through our different Gc types and try to Gc until we get enough free memory.
-  for (size_t i = static_cast<size_t>(last_gc) + 1;
-      i < static_cast<size_t>(collector::kGcTypeMax); ++i) {
-    bool run_gc = false;
-    collector::GcType gc_type = static_cast<collector::GcType>(i);
-    switch (gc_type) {
-      case collector::kGcTypeSticky: {
-          const size_t alloc_space_size = alloc_space_->Size();
-          run_gc = alloc_space_size > min_alloc_space_size_for_sticky_gc_ &&
-              alloc_space_->Capacity() - alloc_space_size >= min_remaining_space_for_sticky_gc_;
-          break;
-        }
-      case collector::kGcTypePartial:
-        run_gc = have_zygote_space_;
-        break;
-      case collector::kGcTypeFull:
-        run_gc = true;
-        break;
-      default:
-        break;
+  for (collector::GcType gc_type : gc_plan_) {
+    if (ptr != nullptr) {
+      break;
     }
-
-    if (run_gc) {
-      // If we actually ran a different type of Gc than requested, we can skip the index forwards.
-      collector::GcType gc_type_ran = CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
-      DCHECK_GE(static_cast<size_t>(gc_type_ran), i);
-      i = static_cast<size_t>(gc_type_ran);
-
+    // Attempt to run the collector, if we succeed, re-try the allocation.
+    if (CollectGarbageInternal(gc_type, kGcCauseForAlloc, false) != collector::kGcTypeNone) {
       // Did we free sufficient memory for the allocation to succeed?
-      ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
-      if (ptr != NULL) {
-        return ptr;
-      }
+      ptr = TryToAllocate<true>(self, allocator, alloc_size, false, bytes_allocated);
     }
   }
-
   // Allocations have failed after GCs;  this is an exceptional state.
-  // Try harder, growing the heap if necessary.
-  ptr = TryToAllocateInstrumented(self, space, alloc_size, true, bytes_allocated);
-  if (ptr != NULL) {
-    return ptr;
+  if (ptr == nullptr) {
+    // Try harder, growing the heap if necessary.
+    ptr = TryToAllocate<true>(self, allocator, alloc_size, true, bytes_allocated);
   }
-
-  // Most allocations should have succeeded by now, so the heap is really full, really fragmented,
-  // or the requested size is really big. Do another GC, collecting SoftReferences this time. The
-  // VM spec requires that all SoftReferences have been collected and cleared before throwing OOME.
-
-  // OLD-TODO: wait for the finalizers from the previous GC to finish
-  VLOG(gc) << "Forcing collection of SoftReferences for " << PrettySize(alloc_size)
-           << " allocation";
-
-  // We don't need a WaitForConcurrentGcToComplete here either.
-  CollectGarbageInternal(collector::kGcTypeFull, kGcCauseForAlloc, true);
-  return TryToAllocateInstrumented(self, space, alloc_size, true, bytes_allocated);
+  if (ptr == nullptr) {
+    // Most allocations should have succeeded by now, so the heap is really full, really fragmented,
+    // or the requested size is really big. Do another GC, collecting SoftReferences this time. The
+    // VM spec requires that all SoftReferences have been collected and cleared before throwing
+    // OOME.
+    VLOG(gc) << "Forcing collection of SoftReferences for " << PrettySize(alloc_size)
+             << " allocation";
+    // TODO: Run finalization, but this may cause more allocations to occur.
+    // We don't need a WaitForGcToComplete here either.
+    DCHECK(!gc_plan_.empty());
+    CollectGarbageInternal(gc_plan_.back(), kGcCauseForAlloc, true);
+    ptr = TryToAllocate<true>(self, allocator, alloc_size, true, bytes_allocated);
+    if (ptr == nullptr) {
+      ThrowOutOfMemoryError(self, alloc_size, false);
+    }
+  }
+  return ptr;
 }
 
 void Heap::SetTargetHeapUtilization(float target) {
@@ -914,51 +1048,24 @@
 
 size_t Heap::GetObjectsAllocated() const {
   size_t total = 0;
-  typedef std::vector<space::ContinuousSpace*>::const_iterator It;
-  for (It it = continuous_spaces_.begin(), end = continuous_spaces_.end(); it != end; ++it) {
-    space::ContinuousSpace* space = *it;
-    if (space->IsDlMallocSpace()) {
-      total += space->AsDlMallocSpace()->GetObjectsAllocated();
-    }
-  }
-  typedef std::vector<space::DiscontinuousSpace*>::const_iterator It2;
-  for (It2 it = discontinuous_spaces_.begin(), end = discontinuous_spaces_.end(); it != end; ++it) {
-    space::DiscontinuousSpace* space = *it;
-    total += space->AsLargeObjectSpace()->GetObjectsAllocated();
+  for (space::AllocSpace* space : alloc_spaces_) {
+    total += space->GetObjectsAllocated();
   }
   return total;
 }
 
 size_t Heap::GetObjectsAllocatedEver() const {
   size_t total = 0;
-  typedef std::vector<space::ContinuousSpace*>::const_iterator It;
-  for (It it = continuous_spaces_.begin(), end = continuous_spaces_.end(); it != end; ++it) {
-    space::ContinuousSpace* space = *it;
-    if (space->IsDlMallocSpace()) {
-      total += space->AsDlMallocSpace()->GetTotalObjectsAllocated();
-    }
-  }
-  typedef std::vector<space::DiscontinuousSpace*>::const_iterator It2;
-  for (It2 it = discontinuous_spaces_.begin(), end = discontinuous_spaces_.end(); it != end; ++it) {
-    space::DiscontinuousSpace* space = *it;
-    total += space->AsLargeObjectSpace()->GetTotalObjectsAllocated();
+  for (space::AllocSpace* space : alloc_spaces_) {
+    total += space->GetTotalObjectsAllocated();
   }
   return total;
 }
 
 size_t Heap::GetBytesAllocatedEver() const {
   size_t total = 0;
-  typedef std::vector<space::ContinuousSpace*>::const_iterator It;
-  for (It it = continuous_spaces_.begin(), end = continuous_spaces_.end(); it != end; ++it) {
-    space::ContinuousSpace* space = *it;
-    if (space->IsDlMallocSpace()) {
-      total += space->AsDlMallocSpace()->GetTotalBytesAllocated();
-    }
-  }
-  typedef std::vector<space::DiscontinuousSpace*>::const_iterator It2;
-  for (It2 it = discontinuous_spaces_.begin(), end = discontinuous_spaces_.end(); it != end; ++it) {
-    space::DiscontinuousSpace* space = *it;
-    total += space->AsLargeObjectSpace()->GetTotalBytesAllocated();
+  for (space::AllocSpace* space : alloc_spaces_) {
+    total += space->GetTotalBytesAllocated();
   }
   return total;
 }
@@ -1056,8 +1163,8 @@
   // For bitmap Visit.
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
-  void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
-    collector::MarkSweep::VisitObjectReferences(obj, *this, true);
+  void operator()(const mirror::Object* o) const NO_THREAD_SAFETY_ANALYSIS {
+    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(o), *this, true);
   }
 
   // For MarkSweep::VisitObjectReferences.
@@ -1093,56 +1200,89 @@
 void Heap::CollectGarbage(bool clear_soft_references) {
   // Even if we waited for a GC we still need to do another GC since weaks allocated during the
   // last GC will not have necessarily been cleared.
-  Thread* self = Thread::Current();
-  WaitForConcurrentGcToComplete(self);
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseExplicit, clear_soft_references);
 }
 
+void Heap::ChangeCollector(CollectorType collector_type) {
+  switch (collector_type) {
+    case kCollectorTypeSS: {
+      ChangeAllocator(kAllocatorTypeBumpPointer);
+      break;
+    }
+    case kCollectorTypeMS:
+      // Fall-through.
+    case kCollectorTypeCMS: {
+      ChangeAllocator(kAllocatorTypeFreeList);
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented";
+    }
+  }
+}
+
 void Heap::PreZygoteFork() {
   static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
-  // Do this before acquiring the zygote creation lock so that we don't get lock order violations.
-  CollectGarbage(false);
   Thread* self = Thread::Current();
   MutexLock mu(self, zygote_creation_lock_);
-
   // Try to see if we have any Zygote spaces.
   if (have_zygote_space_) {
     return;
   }
-
-  VLOG(heap) << "Starting PreZygoteFork with alloc space size " << PrettySize(alloc_space_->Size());
-
-  {
-    // Flush the alloc stack.
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    FlushAllocStack();
+  VLOG(heap) << "Starting PreZygoteFork";
+  // Do this before acquiring the zygote creation lock so that we don't get lock order violations.
+  CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
+  // Trim the pages at the end of the non moving space.
+  non_moving_space_->Trim();
+  non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+  // Change the allocator to the post zygote one.
+  ChangeCollector(collector_type_);
+  // TODO: Delete bump_pointer_space_ and temp_pointer_space_?
+  if (semi_space_collector_ != nullptr) {
+    // Create a new bump pointer space which we will compact into.
+    space::BumpPointerSpace target_space("zygote bump space", non_moving_space_->End(),
+                                         non_moving_space_->Limit());
+    // Compact the bump pointer space to a new zygote bump pointer space.
+    temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+    Compact(&target_space, bump_pointer_space_);
+    CHECK_EQ(temp_space_->GetBytesAllocated(), 0U);
+    total_objects_freed_ever_ += semi_space_collector_->GetFreedObjects();
+    total_bytes_freed_ever_ += semi_space_collector_->GetFreedBytes();
+    // Update the end and write out image.
+    non_moving_space_->SetEnd(target_space.End());
+    non_moving_space_->SetLimit(target_space.Limit());
+    accounting::SpaceBitmap* bitmap = non_moving_space_->GetLiveBitmap();
+    // Record the allocations in the bitmap.
+    VLOG(heap) << "Recording zygote allocations";
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(target_space.Begin());
+    const mirror::Object* end = reinterpret_cast<const mirror::Object*>(target_space.End());
+    while (obj < end) {
+      bitmap->Set(obj);
+      obj = space::BumpPointerSpace::GetNextObject(obj);
+    }
   }
-
-  // Turns the current alloc space into a Zygote space and obtain the new alloc space composed
-  // of the remaining available heap memory.
-  space::DlMallocSpace* zygote_space = alloc_space_;
-  alloc_space_ = zygote_space->CreateZygoteSpace("alloc space");
-  alloc_space_->SetFootprintLimit(alloc_space_->Capacity());
-
+  // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
+  // the remaining available heap memory.
+  space::MallocSpace* zygote_space = non_moving_space_;
+  non_moving_space_ = zygote_space->CreateZygoteSpace("alloc space");
+  non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   // Change the GC retention policy of the zygote space to only collect when full.
   zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
-  AddContinuousSpace(alloc_space_);
+  AddSpace(non_moving_space_);
   have_zygote_space_ = true;
-
+  zygote_space->InvalidateAllocator();
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
       new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space);
   CHECK(mod_union_table != nullptr) << "Failed to create zygote space mod-union table";
   AddModUnionTable(mod_union_table);
-
   // Reset the cumulative loggers since we now have a few additional timing phases.
-  for (const auto& collector : mark_sweep_collectors_) {
+  for (const auto& collector : garbage_collectors_) {
     collector->ResetCumulativeStatistics();
   }
 }
 
 void Heap::FlushAllocStack() {
-  MarkAllocStack(alloc_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(),
+  MarkAllocStack(non_moving_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(),
                  allocation_stack_.get());
   allocation_stack_->Reset();
 }
@@ -1161,86 +1301,131 @@
   }
 }
 
+const char* PrettyCause(GcCause cause) {
+  switch (cause) {
+    case kGcCauseForAlloc: return "Alloc";
+    case kGcCauseBackground: return "Background";
+    case kGcCauseExplicit: return "Explicit";
+    default:
+      LOG(FATAL) << "Unreachable";
+  }
+  return "";
+}
 
-const char* gc_cause_and_type_strings[3][4] = {
-    {"", "GC Alloc Sticky", "GC Alloc Partial", "GC Alloc Full"},
-    {"", "GC Background Sticky", "GC Background Partial", "GC Background Full"},
-    {"", "GC Explicit Sticky", "GC Explicit Partial", "GC Explicit Full"}};
+void Heap::SwapSemiSpaces() {
+  // Swap the spaces so we allocate into the space which we just evacuated.
+  std::swap(bump_pointer_space_, temp_space_);
+}
+
+void Heap::Compact(space::ContinuousMemMapAllocSpace* target_space,
+                   space::ContinuousMemMapAllocSpace* source_space) {
+  CHECK(kMovingCollector);
+  CHECK_NE(target_space, source_space) << "In-place compaction currently unsupported";
+  if (target_space != source_space) {
+    semi_space_collector_->SetFromSpace(source_space);
+    semi_space_collector_->SetToSpace(target_space);
+    semi_space_collector_->Run(false);
+  }
+}
 
 collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCause gc_cause,
                                                bool clear_soft_references) {
   Thread* self = Thread::Current();
-
+  Runtime* runtime = Runtime::Current();
+  // If the heap can't run the GC, silently fail and return that no GC was run.
+  switch (gc_type) {
+    case collector::kGcTypeSticky: {
+      const size_t alloc_space_size = non_moving_space_->Size();
+      if (alloc_space_size < min_alloc_space_size_for_sticky_gc_ ||
+        non_moving_space_->Capacity() - alloc_space_size < min_remaining_space_for_sticky_gc_) {
+        return collector::kGcTypeNone;
+      }
+      break;
+    }
+    case collector::kGcTypePartial: {
+      if (!have_zygote_space_) {
+        return collector::kGcTypeNone;
+      }
+      break;
+    }
+    default: {
+      // Other GC types don't have any special cases which makes them not runnable. The main case
+      // here is full GC.
+    }
+  }
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
-
   if (self->IsHandlingStackOverflow()) {
     LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow.";
   }
-
-  // Ensure there is only one GC at a time.
-  bool start_collect = false;
-  while (!start_collect) {
-    {
-      MutexLock mu(self, *gc_complete_lock_);
-      if (!is_gc_running_) {
-        is_gc_running_ = true;
-        start_collect = true;
-      }
+  {
+    gc_complete_lock_->AssertNotHeld(self);
+    MutexLock mu(self, *gc_complete_lock_);
+    // Ensure there is only one GC at a time.
+    WaitForGcToCompleteLocked(self);
+    // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
+    //       Not doing at the moment to ensure soft references are cleared.
+    // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
+    if (gc_disable_count_ != 0) {
+      LOG(WARNING) << "Skipping GC due to disable count " << gc_disable_count_;
+      return collector::kGcTypeNone;
     }
-    if (!start_collect) {
-      // TODO: timinglog this.
-      WaitForConcurrentGcToComplete(self);
-
-      // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
-      //       Not doing at the moment to ensure soft references are cleared.
-    }
+    is_gc_running_ = true;
   }
-  gc_complete_lock_->AssertNotHeld(self);
-
-  if (gc_cause == kGcCauseForAlloc && Runtime::Current()->HasStatsEnabled()) {
-    ++Runtime::Current()->GetStats()->gc_for_alloc_count;
-    ++Thread::Current()->GetStats()->gc_for_alloc_count;
+  if (gc_cause == kGcCauseForAlloc && runtime->HasStatsEnabled()) {
+    ++runtime->GetStats()->gc_for_alloc_count;
+    ++self->GetStats()->gc_for_alloc_count;
   }
-
   uint64_t gc_start_time_ns = NanoTime();
   uint64_t gc_start_size = GetBytesAllocated();
   // Approximate allocation rate in bytes / second.
-  if (UNLIKELY(gc_start_time_ns == last_gc_time_ns_)) {
-    LOG(WARNING) << "Timers are broken (gc_start_time == last_gc_time_).";
-  }
   uint64_t ms_delta = NsToMs(gc_start_time_ns - last_gc_time_ns_);
-  if (ms_delta != 0) {
+  // Back to back GCs can cause 0 ms of wait time in between GC invocations.
+  if (LIKELY(ms_delta != 0)) {
     allocation_rate_ = ((gc_start_size - last_gc_size_) * 1000) / ms_delta;
     VLOG(heap) << "Allocation rate: " << PrettySize(allocation_rate_) << "/s";
   }
 
-  if (gc_type == collector::kGcTypeSticky &&
-      alloc_space_->Size() < min_alloc_space_size_for_sticky_gc_) {
-    gc_type = collector::kGcTypePartial;
-  }
-
   DCHECK_LT(gc_type, collector::kGcTypeMax);
   DCHECK_NE(gc_type, collector::kGcTypeNone);
-  DCHECK_LE(gc_cause, kGcCauseExplicit);
 
-  ATRACE_BEGIN(gc_cause_and_type_strings[gc_cause][gc_type]);
-
-  collector::MarkSweep* collector = NULL;
-  for (const auto& cur_collector : mark_sweep_collectors_) {
-    if (cur_collector->IsConcurrent() == concurrent_gc_ && cur_collector->GetGcType() == gc_type) {
-      collector = cur_collector;
-      break;
+  collector::GarbageCollector* collector = nullptr;
+  // TODO: Clean this up.
+  if (current_allocator_ == kAllocatorTypeBumpPointer) {
+    gc_type = semi_space_collector_->GetGcType();
+    CHECK_EQ(temp_space_->GetObjectsAllocated(), 0U);
+    semi_space_collector_->SetFromSpace(bump_pointer_space_);
+    semi_space_collector_->SetToSpace(temp_space_);
+    mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
+    collector = semi_space_collector_;
+    gc_type = collector::kGcTypeFull;
+  } else if (current_allocator_ == kAllocatorTypeFreeList) {
+    for (const auto& cur_collector : garbage_collectors_) {
+      if (cur_collector->IsConcurrent() == concurrent_gc_ &&
+          cur_collector->GetGcType() == gc_type) {
+        collector = cur_collector;
+        break;
+      }
     }
+  } else {
+    LOG(FATAL) << "Invalid current allocator " << current_allocator_;
   }
   CHECK(collector != NULL)
       << "Could not find garbage collector with concurrent=" << concurrent_gc_
       << " and type=" << gc_type;
 
-  collector->clear_soft_references_ = clear_soft_references;
-  collector->Run();
+  ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
+
+  collector->Run(clear_soft_references);
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
+
+  // Enqueue cleared references.
+  EnqueueClearedReferences();
+
+  // Grow the heap so that we know when to perform the next GC.
+  GrowForUtilization(gc_type, collector->GetDurationNs());
+
   if (care_about_pause_times_) {
     const size_t duration = collector->GetDurationNs();
     std::vector<uint64_t> pauses = collector->GetPauseTimes();
@@ -1252,7 +1437,6 @@
         was_slow = was_slow || pause > long_pause_log_threshold_;
       }
     }
-
     if (was_slow) {
         const size_t percent_free = GetPercentFree();
         const size_t current_heap_size = GetBytesAllocated();
@@ -1271,7 +1455,7 @@
                   << PrettySize(total_memory) << ", " << "paused " << pause_string.str()
                   << " total " << PrettyDuration((duration / 1000) * 1000);
         if (VLOG_IS_ON(heap)) {
-            LOG(INFO) << Dumpable<base::TimingLogger>(collector->GetTimings());
+            LOG(INFO) << Dumpable<TimingLogger>(collector->GetTimings());
         }
     }
   }
@@ -1327,7 +1511,6 @@
       accounting::CardTable* card_table = heap_->GetCardTable();
       accounting::ObjectStack* alloc_stack = heap_->allocation_stack_.get();
       accounting::ObjectStack* live_stack = heap_->live_stack_.get();
-
       if (!failed_) {
         // Print message on only on first failure to prevent spam.
         LOG(ERROR) << "!!!!!!!!!!!!!!Heap corruption detected!!!!!!!!!!!!!!!!!!!";
@@ -1337,7 +1520,7 @@
         byte* card_addr = card_table->CardFromAddr(obj);
         LOG(ERROR) << "Object " << obj << " references dead object " << ref << " at offset "
                    << offset << "\n card value = " << static_cast<int>(*card_addr);
-        if (heap_->IsHeapAddress(obj->GetClass())) {
+        if (heap_->IsValidObjectAddress(obj->GetClass())) {
           LOG(ERROR) << "Obj type " << PrettyTypeOf(obj);
         } else {
           LOG(ERROR) << "Object " << obj << " class(" << obj->GetClass() << ") not a heap address";
@@ -1345,8 +1528,8 @@
 
         // Attmept to find the class inside of the recently freed objects.
         space::ContinuousSpace* ref_space = heap_->FindContinuousSpaceFromObject(ref, true);
-        if (ref_space->IsDlMallocSpace()) {
-          space::DlMallocSpace* space = ref_space->AsDlMallocSpace();
+        if (ref_space != nullptr && ref_space->IsMallocSpace()) {
+          space::MallocSpace* space = ref_space->AsMallocSpace();
           mirror::Class* ref_class = space->FindRecentFreedObject(ref);
           if (ref_class != nullptr) {
             LOG(ERROR) << "Reference " << ref << " found as a recently freed object with class "
@@ -1356,7 +1539,7 @@
           }
         }
 
-        if (ref->GetClass() != nullptr && heap_->IsHeapAddress(ref->GetClass()) &&
+        if (ref->GetClass() != nullptr && heap_->IsValidObjectAddress(ref->GetClass()) &&
             ref->GetClass()->IsClass()) {
           LOG(ERROR) << "Ref type " << PrettyTypeOf(ref);
         } else {
@@ -1427,17 +1610,25 @@
  public:
   explicit VerifyObjectVisitor(Heap* heap) : heap_(heap), failed_(false) {}
 
-  void operator()(const mirror::Object* obj) const
+  void operator()(mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     // Note: we are verifying the references in obj but not obj itself, this is because obj must
     // be live or else how did we find it in the live bitmap?
     VerifyReferenceVisitor visitor(heap_);
     // The class doesn't count as a reference but we should verify it anyways.
-    visitor(obj, obj->GetClass(), MemberOffset(0), false);
-    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(obj), visitor, true);
+    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
+    if (obj->GetClass()->IsReferenceClass()) {
+      visitor(obj, heap_->GetReferenceReferent(obj), MemberOffset(0), false);
+    }
     failed_ = failed_ || visitor.Failed();
   }
 
+  static void VisitCallback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    VerifyObjectVisitor* visitor = reinterpret_cast<VerifyObjectVisitor*>(arg);
+    visitor->operator()(obj);
+  }
+
   bool Failed() const {
     return failed_;
   }
@@ -1453,18 +1644,15 @@
   // Lets sort our allocation stacks so that we can efficiently binary search them.
   allocation_stack_->Sort();
   live_stack_->Sort();
-  // Perform the verification.
   VerifyObjectVisitor visitor(this);
-  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRoots, &visitor, false, false);
-  GetLiveBitmap()->Visit(visitor);
   // Verify objects in the allocation stack since these will be objects which were:
   // 1. Allocated prior to the GC (pre GC verification).
   // 2. Allocated during the GC (pre sweep GC verification).
-  for (mirror::Object** it = allocation_stack_->Begin(); it != allocation_stack_->End(); ++it) {
-    visitor(*it);
-  }
   // We don't want to verify the objects in the live stack since they themselves may be
   // pointing to dead objects if they are not reachable.
+  VisitObjects(VerifyObjectVisitor::VisitCallback, &visitor);
+  // Verify the roots:
+  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRoots, &visitor, false, false);
   if (visitor.Failed()) {
     // Dump mod-union tables.
     for (const auto& table_pair : mod_union_tables_) {
@@ -1557,7 +1745,7 @@
   void operator()(mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     VerifyReferenceCardVisitor visitor(heap_, const_cast<bool*>(&failed_));
-    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
+    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(obj), visitor, true);
   }
 
   bool Failed() const {
@@ -1601,19 +1789,23 @@
   return it->second;
 }
 
-void Heap::ProcessCards(base::TimingLogger& timings) {
+void Heap::ProcessCards(TimingLogger& timings) {
   // Clear cards and keep track of cards cleared in the mod-union table.
   for (const auto& space : continuous_spaces_) {
     accounting::ModUnionTable* table = FindModUnionTableFromSpace(space);
     if (table != nullptr) {
       const char* name = space->IsZygoteSpace() ? "ZygoteModUnionClearCards" :
           "ImageModUnionClearCards";
-      base::TimingLogger::ScopedSplit split(name, &timings);
+      TimingLogger::ScopedSplit split(name, &timings);
       table->ClearCards();
-    } else {
-      base::TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings);
+    } else if (space->GetType() != space::kSpaceTypeBumpPointerSpace) {
+      TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings);
       // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards
       // were dirty before the GC started.
+      // TODO: Don't need to use atomic.
+      // The races are we either end up with: Aged card, unaged card. Since we have the checkpoint
+      // roots and then we scan / update mod union tables after. We will always scan either card.//
+      // If we end up with the non aged card, we scan it it in the pause.
       card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), VoidFunctor());
     }
   }
@@ -1692,36 +1884,27 @@
   }
 }
 
-collector::GcType Heap::WaitForConcurrentGcToComplete(Thread* self) {
+collector::GcType Heap::WaitForGcToComplete(Thread* self) {
+  ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+  MutexLock mu(self, *gc_complete_lock_);
+  return WaitForGcToCompleteLocked(self);
+}
+
+collector::GcType Heap::WaitForGcToCompleteLocked(Thread* self) {
   collector::GcType last_gc_type = collector::kGcTypeNone;
-  if (concurrent_gc_) {
-    ATRACE_BEGIN("GC: Wait For Concurrent");
-    bool do_wait;
-    uint64_t wait_start = NanoTime();
-    {
-      // Check if GC is running holding gc_complete_lock_.
-      MutexLock mu(self, *gc_complete_lock_);
-      do_wait = is_gc_running_;
-    }
-    if (do_wait) {
-      uint64_t wait_time;
-      // We must wait, change thread state then sleep on gc_complete_cond_;
-      ScopedThreadStateChange tsc(Thread::Current(), kWaitingForGcToComplete);
-      {
-        MutexLock mu(self, *gc_complete_lock_);
-        while (is_gc_running_) {
-          gc_complete_cond_->Wait(self);
-        }
-        last_gc_type = last_gc_type_;
-        wait_time = NanoTime() - wait_start;
-        total_wait_time_ += wait_time;
-      }
-      if (wait_time > long_pause_log_threshold_) {
-        LOG(INFO) << "WaitForConcurrentGcToComplete blocked for " << PrettyDuration(wait_time);
-      }
-    }
+  uint64_t wait_start = NanoTime();
+  while (is_gc_running_) {
+    ATRACE_BEGIN("GC: Wait For Completion");
+    // We must wait, change thread state then sleep on gc_complete_cond_;
+    gc_complete_cond_->Wait(self);
+    last_gc_type = last_gc_type_;
     ATRACE_END();
   }
+  uint64_t wait_time = NanoTime() - wait_start;
+  total_wait_time_ += wait_time;
+  if (wait_time > long_pause_log_threshold_) {
+    LOG(INFO) << "WaitForGcToComplete blocked for " << PrettyDuration(wait_time);
+  }
   return last_gc_type;
 }
 
@@ -1744,6 +1927,23 @@
   max_allowed_footprint_ = max_allowed_footprint;
 }
 
+bool Heap::IsMovableObject(const mirror::Object* obj) const {
+  if (kMovingCollector) {
+    DCHECK(!IsInTempSpace(obj));
+    if (bump_pointer_space_->HasAddress(obj)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Heap::IsInTempSpace(const mirror::Object* obj) const {
+  if (temp_space_->HasAddress(obj) && !temp_space_->Contains(obj)) {
+    return true;
+  }
+  return false;
+}
+
 void Heap::UpdateMaxNativeFootprint() {
   size_t native_size = native_bytes_allocated_;
   // TODO: Tune the native heap utilization to be a value other than the java heap utilization.
@@ -1763,7 +1963,6 @@
   const size_t bytes_allocated = GetBytesAllocated();
   last_gc_size_ = bytes_allocated;
   last_gc_time_ns_ = NanoTime();
-
   size_t target_size;
   if (gc_type != collector::kGcTypeSticky) {
     // Grow the heap for non sticky GC.
@@ -1773,6 +1972,7 @@
     } else if (target_size < bytes_allocated + min_free_) {
       target_size = bytes_allocated + min_free_;
     }
+    native_need_to_run_finalization_ = true;
     next_gc_type_ = collector::kGcTypeSticky;
   } else {
     // Based on how close the current heap size is to the target size, decide
@@ -1782,7 +1982,6 @@
     } else {
       next_gc_type_ = collector::kGcTypePartial;
     }
-
     // If we have freed enough memory, shrink the heap back down.
     if (bytes_allocated + max_free_ < max_allowed_footprint_) {
       target_size = bytes_allocated + max_free_;
@@ -1790,13 +1989,10 @@
       target_size = std::max(bytes_allocated, max_allowed_footprint_);
     }
   }
-
   if (!ignore_max_footprint_) {
     SetIdealFootprint(target_size);
-
-    if (concurrent_gc_) {
+    if (concurrent_gc_ && AllocatorHasConcurrentGC(current_allocator_)) {
       // Calculate when to perform the next ConcurrentGC.
-
       // Calculate the estimated GC duration.
       double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
       // Estimate how many remaining bytes we will have when we need to start the next GC.
@@ -1811,26 +2007,25 @@
         // Start a concurrent GC when we get close to the estimated remaining bytes. When the
         // allocation rate is very high, remaining_bytes could tell us that we should start a GC
         // right away.
-        concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes, bytes_allocated);
+        concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes,
+                                           bytes_allocated);
       }
       DCHECK_LE(concurrent_start_bytes_, max_allowed_footprint_);
       DCHECK_LE(max_allowed_footprint_, growth_limit_);
     }
   }
-
-  UpdateMaxNativeFootprint();
 }
 
 void Heap::ClearGrowthLimit() {
   growth_limit_ = capacity_;
-  alloc_space_->ClearGrowthLimit();
+  non_moving_space_->ClearGrowthLimit();
 }
 
 void Heap::SetReferenceOffsets(MemberOffset reference_referent_offset,
-                                MemberOffset reference_queue_offset,
-                                MemberOffset reference_queueNext_offset,
-                                MemberOffset reference_pendingNext_offset,
-                                MemberOffset finalizer_reference_zombie_offset) {
+                               MemberOffset reference_queue_offset,
+                               MemberOffset reference_queueNext_offset,
+                               MemberOffset reference_pendingNext_offset,
+                               MemberOffset finalizer_reference_zombie_offset) {
   reference_referent_offset_ = reference_referent_offset;
   reference_queue_offset_ = reference_queue_offset;
   reference_queueNext_offset_ = reference_queueNext_offset;
@@ -1843,78 +2038,18 @@
   CHECK_NE(finalizer_reference_zombie_offset_.Uint32Value(), 0U);
 }
 
+void Heap::SetReferenceReferent(mirror::Object* reference, mirror::Object* referent) {
+  DCHECK(reference != NULL);
+  DCHECK_NE(reference_referent_offset_.Uint32Value(), 0U);
+  reference->SetFieldObject(reference_referent_offset_, referent, true);
+}
+
 mirror::Object* Heap::GetReferenceReferent(mirror::Object* reference) {
   DCHECK(reference != NULL);
   DCHECK_NE(reference_referent_offset_.Uint32Value(), 0U);
   return reference->GetFieldObject<mirror::Object*>(reference_referent_offset_, true);
 }
 
-void Heap::ClearReferenceReferent(mirror::Object* reference) {
-  DCHECK(reference != NULL);
-  DCHECK_NE(reference_referent_offset_.Uint32Value(), 0U);
-  reference->SetFieldObject(reference_referent_offset_, NULL, true);
-}
-
-// Returns true if the reference object has not yet been enqueued.
-bool Heap::IsEnqueuable(const mirror::Object* ref) {
-  DCHECK(ref != NULL);
-  const mirror::Object* queue =
-      ref->GetFieldObject<mirror::Object*>(reference_queue_offset_, false);
-  const mirror::Object* queue_next =
-      ref->GetFieldObject<mirror::Object*>(reference_queueNext_offset_, false);
-  return (queue != NULL) && (queue_next == NULL);
-}
-
-void Heap::EnqueueReference(mirror::Object* ref, mirror::Object** cleared_reference_list) {
-  DCHECK(ref != NULL);
-  CHECK(ref->GetFieldObject<mirror::Object*>(reference_queue_offset_, false) != NULL);
-  CHECK(ref->GetFieldObject<mirror::Object*>(reference_queueNext_offset_, false) == NULL);
-  EnqueuePendingReference(ref, cleared_reference_list);
-}
-
-bool Heap::IsEnqueued(mirror::Object* ref) {
-  // Since the references are stored as cyclic lists it means that once enqueued, the pending next
-  // will always be non-null.
-  return ref->GetFieldObject<mirror::Object*>(GetReferencePendingNextOffset(), false) != nullptr;
-}
-
-void Heap::EnqueuePendingReference(mirror::Object* ref, mirror::Object** list) {
-  DCHECK(ref != NULL);
-  DCHECK(list != NULL);
-  if (*list == NULL) {
-    // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
-    ref->SetFieldObject(reference_pendingNext_offset_, ref, false);
-    *list = ref;
-  } else {
-    mirror::Object* head =
-        (*list)->GetFieldObject<mirror::Object*>(reference_pendingNext_offset_, false);
-    ref->SetFieldObject(reference_pendingNext_offset_, head, false);
-    (*list)->SetFieldObject(reference_pendingNext_offset_, ref, false);
-  }
-}
-
-mirror::Object* Heap::DequeuePendingReference(mirror::Object** list) {
-  DCHECK(list != NULL);
-  DCHECK(*list != NULL);
-  mirror::Object* head = (*list)->GetFieldObject<mirror::Object*>(reference_pendingNext_offset_,
-                                                                  false);
-  mirror::Object* ref;
-
-  // Note: the following code is thread-safe because it is only called from ProcessReferences which
-  // is single threaded.
-  if (*list == head) {
-    ref = *list;
-    *list = NULL;
-  } else {
-    mirror::Object* next = head->GetFieldObject<mirror::Object*>(reference_pendingNext_offset_,
-                                                                 false);
-    (*list)->SetFieldObject(reference_pendingNext_offset_, next, false);
-    ref = head;
-  }
-  ref->SetFieldObject(reference_pendingNext_offset_, NULL, false);
-  return ref;
-}
-
 void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
   ScopedObjectAccess soa(self);
   JValue result;
@@ -1924,19 +2059,18 @@
       arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
 }
 
-void Heap::EnqueueClearedReferences(mirror::Object** cleared) {
-  DCHECK(cleared != NULL);
-  if (*cleared != NULL) {
+void Heap::EnqueueClearedReferences() {
+  if (!cleared_references_.IsEmpty()) {
     // When a runtime isn't started there are no reference queues to care about so ignore.
     if (LIKELY(Runtime::Current()->IsStarted())) {
       ScopedObjectAccess soa(Thread::Current());
       JValue result;
       ArgArray arg_array(NULL, 0);
-      arg_array.Append(reinterpret_cast<uint32_t>(*cleared));
+      arg_array.Append(reinterpret_cast<uint32_t>(cleared_references_.GetList()));
       soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
           arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
     }
-    *cleared = NULL;
+    cleared_references_.Clear();
   }
 }
 
@@ -1944,43 +2078,39 @@
   // Make sure that we can do a concurrent GC.
   Runtime* runtime = Runtime::Current();
   DCHECK(concurrent_gc_);
-  if (runtime == NULL || !runtime->IsFinishedStarting() ||
-      !runtime->IsConcurrentGcEnabled()) {
+  if (runtime == NULL || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) ||
+      self->IsHandlingStackOverflow()) {
     return;
   }
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    if (runtime->IsShuttingDown()) {
-      return;
-    }
-  }
-  if (self->IsHandlingStackOverflow()) {
-    return;
-  }
-
   // We already have a request pending, no reason to start more until we update
   // concurrent_start_bytes_.
   concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
-
   JNIEnv* env = self->GetJniEnv();
-  DCHECK(WellKnownClasses::java_lang_Daemons != NULL);
-  DCHECK(WellKnownClasses::java_lang_Daemons_requestGC != NULL);
+  DCHECK(WellKnownClasses::java_lang_Daemons != nullptr);
+  DCHECK(WellKnownClasses::java_lang_Daemons_requestGC != nullptr);
   env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
                             WellKnownClasses::java_lang_Daemons_requestGC);
   CHECK(!env->ExceptionCheck());
 }
 
 void Heap::ConcurrentGC(Thread* self) {
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    if (Runtime::Current()->IsShuttingDown()) {
-      return;
-    }
+  if (Runtime::Current()->IsShuttingDown(self)) {
+    return;
   }
-
   // Wait for any GCs currently running to finish.
-  if (WaitForConcurrentGcToComplete(self) == collector::kGcTypeNone) {
-    CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false);
+  if (WaitForGcToComplete(self) == collector::kGcTypeNone) {
+    // If the we can't run the GC type we wanted to run, find the next appropriate one and try that
+    // instead. E.g. can't do partial, so do full instead.
+    if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) ==
+        collector::kGcTypeNone) {
+      for (collector::GcType gc_type : gc_plan_) {
+        // Attempt to run the collector, if we succeed, we are done.
+        if (gc_type > next_gc_type_ &&
+            CollectGarbageInternal(gc_type, kGcCauseBackground, false) != collector::kGcTypeNone) {
+          break;
+        }
+      }
+    }
   }
 }
 
@@ -1998,26 +2128,18 @@
   // We could try mincore(2) but that's only a measure of how many pages we haven't given away,
   // not how much use we're making of those pages.
   uint64_t ms_time = MilliTime();
-  // Note the large object space's bytes allocated is equal to its capacity.
-  uint64_t los_bytes_allocated = large_object_space_->GetBytesAllocated();
-  float utilization = static_cast<float>(GetBytesAllocated() - los_bytes_allocated) /
-      (GetTotalMemory() - los_bytes_allocated);
-  if ((utilization > 0.75f && !IsLowMemoryMode()) || ((ms_time - last_trim_time_ms_) < 2 * 1000)) {
-    // Don't bother trimming the alloc space if it's more than 75% utilized and low memory mode is
-    // not enabled, or if a heap trim occurred in the last two seconds.
+  // Don't bother trimming the alloc space if a heap trim occurred in the last two seconds.
+  if (ms_time - last_trim_time_ms_ < 2 * 1000) {
     return;
   }
 
   Thread* self = Thread::Current();
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    Runtime* runtime = Runtime::Current();
-    if (runtime == NULL || !runtime->IsFinishedStarting() || runtime->IsShuttingDown()) {
-      // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time)
-      // Also: we do not wish to start a heap trim if the runtime is shutting down (a racy check
-      // as we don't hold the lock while requesting the trim).
-      return;
-    }
+  Runtime* runtime = Runtime::Current();
+  if (runtime == nullptr || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self)) {
+    // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time)
+    // Also: we do not wish to start a heap trim if the runtime is shutting down (a racy check
+    // as we don't hold the lock while requesting the trim).
+    return;
   }
 
   last_trim_time_ms_ = ms_time;
@@ -2034,50 +2156,66 @@
   }
 }
 
-size_t Heap::Trim() {
-  // Handle a requested heap trim on a thread outside of the main GC thread.
-  return alloc_space_->Trim();
+void Heap::RevokeThreadLocalBuffers(Thread* thread) {
+  non_moving_space_->RevokeThreadLocalBuffers(thread);
+}
+
+void Heap::RevokeAllThreadLocalBuffers() {
+  non_moving_space_->RevokeAllThreadLocalBuffers();
 }
 
 bool Heap::IsGCRequestPending() const {
   return concurrent_start_bytes_ != std::numeric_limits<size_t>::max();
 }
 
+void Heap::RunFinalization(JNIEnv* env) {
+  // Can't do this in WellKnownClasses::Init since System is not properly set up at that point.
+  if (WellKnownClasses::java_lang_System_runFinalization == nullptr) {
+    CHECK(WellKnownClasses::java_lang_System != nullptr);
+    WellKnownClasses::java_lang_System_runFinalization =
+        CacheMethod(env, WellKnownClasses::java_lang_System, true, "runFinalization", "()V");
+    CHECK(WellKnownClasses::java_lang_System_runFinalization != nullptr);
+  }
+  env->CallStaticVoidMethod(WellKnownClasses::java_lang_System,
+                            WellKnownClasses::java_lang_System_runFinalization);
+}
+
 void Heap::RegisterNativeAllocation(JNIEnv* env, int bytes) {
+  Thread* self = ThreadForEnv(env);
+  if (native_need_to_run_finalization_) {
+    RunFinalization(env);
+    UpdateMaxNativeFootprint();
+    native_need_to_run_finalization_ = false;
+  }
   // Total number of native bytes allocated.
   native_bytes_allocated_.fetch_add(bytes);
   if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_gc_watermark_) {
+    collector::GcType gc_type = have_zygote_space_ ? collector::kGcTypePartial :
+        collector::kGcTypeFull;
+
     // The second watermark is higher than the gc watermark. If you hit this it means you are
     // allocating native objects faster than the GC can keep up with.
     if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
-        // Can't do this in WellKnownClasses::Init since System is not properly set up at that
-        // point.
-        if (UNLIKELY(WellKnownClasses::java_lang_System_runFinalization == NULL)) {
-          DCHECK(WellKnownClasses::java_lang_System != NULL);
-          WellKnownClasses::java_lang_System_runFinalization =
-              CacheMethod(env, WellKnownClasses::java_lang_System, true, "runFinalization", "()V");
-          CHECK(WellKnownClasses::java_lang_System_runFinalization != NULL);
-        }
-        if (WaitForConcurrentGcToComplete(ThreadForEnv(env)) != collector::kGcTypeNone) {
-          // Just finished a GC, attempt to run finalizers.
-          env->CallStaticVoidMethod(WellKnownClasses::java_lang_System,
-                                    WellKnownClasses::java_lang_System_runFinalization);
-          CHECK(!env->ExceptionCheck());
-        }
-
-        // If we still are over the watermark, attempt a GC for alloc and run finalizers.
-        if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
-          CollectGarbageInternal(collector::kGcTypePartial, kGcCauseForAlloc, false);
-          env->CallStaticVoidMethod(WellKnownClasses::java_lang_System,
-                                    WellKnownClasses::java_lang_System_runFinalization);
-          CHECK(!env->ExceptionCheck());
-        }
-        // We have just run finalizers, update the native watermark since it is very likely that
-        // finalizers released native managed allocations.
-        UpdateMaxNativeFootprint();
-    } else {
-      if (!IsGCRequestPending()) {
-        RequestConcurrentGC(ThreadForEnv(env));
+      if (WaitForGcToComplete(self) != collector::kGcTypeNone) {
+        // Just finished a GC, attempt to run finalizers.
+        RunFinalization(env);
+        CHECK(!env->ExceptionCheck());
+      }
+      // If we still are over the watermark, attempt a GC for alloc and run finalizers.
+      if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
+        CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
+        RunFinalization(env);
+        native_need_to_run_finalization_ = false;
+        CHECK(!env->ExceptionCheck());
+      }
+      // We have just run finalizers, update the native watermark since it is very likely that
+      // finalizers released native managed allocations.
+      UpdateMaxNativeFootprint();
+    } else if (!IsGCRequestPending()) {
+      if (concurrent_gc_ && AllocatorHasConcurrentGC(current_allocator_)) {
+        RequestConcurrentGC(self);
+      } else {
+        CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
       }
     }
   }
@@ -2086,26 +2224,24 @@
 void Heap::RegisterNativeFree(JNIEnv* env, int bytes) {
   int expected_size, new_size;
   do {
-      expected_size = native_bytes_allocated_.load();
-      new_size = expected_size - bytes;
-      if (UNLIKELY(new_size < 0)) {
-        ScopedObjectAccess soa(env);
-        env->ThrowNew(WellKnownClasses::java_lang_RuntimeException,
-                      StringPrintf("Attempted to free %d native bytes with only %d native bytes "
-                                   "registered as allocated", bytes, expected_size).c_str());
-        break;
-      }
+    expected_size = native_bytes_allocated_.load();
+    new_size = expected_size - bytes;
+    if (UNLIKELY(new_size < 0)) {
+      ScopedObjectAccess soa(env);
+      env->ThrowNew(WellKnownClasses::java_lang_RuntimeException,
+                    StringPrintf("Attempted to free %d native bytes with only %d native bytes "
+                                 "registered as allocated", bytes, expected_size).c_str());
+      break;
+    }
   } while (!native_bytes_allocated_.compare_and_swap(expected_size, new_size));
 }
 
 int64_t Heap::GetTotalMemory() const {
   int64_t ret = 0;
   for (const auto& space : continuous_spaces_) {
-    if (space->IsImageSpace()) {
-      // Currently don't include the image space.
-    } else if (space->IsDlMallocSpace()) {
-      // Zygote or alloc space
-      ret += space->AsDlMallocSpace()->GetFootprint();
+    // Currently don't include the image space.
+    if (!space->IsImageSpace()) {
+      ret += space->Size();
     }
   }
   for (const auto& space : discontinuous_spaces_) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 7d2441b..8c5746d 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -26,11 +26,14 @@
 #include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table.h"
 #include "gc/collector/gc_type.h"
+#include "gc/collector_type.h"
 #include "globals.h"
 #include "gtest/gtest.h"
 #include "jni.h"
 #include "locks.h"
 #include "offsets.h"
+#include "reference_queue.h"
+#include "root_visitor.h"
 #include "safe_map.h"
 #include "thread_pool.h"
 
@@ -57,16 +60,21 @@
 namespace collector {
   class GarbageCollector;
   class MarkSweep;
+  class SemiSpace;
 }  // namespace collector
 
 namespace space {
   class AllocSpace;
+  class BumpPointerSpace;
   class DiscontinuousSpace;
   class DlMallocSpace;
   class ImageSpace;
   class LargeObjectSpace;
+  class MallocSpace;
+  class RosAllocSpace;
   class Space;
   class SpaceTest;
+  class ContinuousMemMapAllocSpace;
 }  // namespace space
 
 class AgeCardVisitor {
@@ -80,6 +88,13 @@
   }
 };
 
+// Different types of allocators.
+enum AllocatorType {
+  kAllocatorTypeBumpPointer,
+  kAllocatorTypeFreeList,  // ROSAlloc / dlmalloc
+  kAllocatorTypeLOS,  // Large object space.
+};
+
 // What caused the GC?
 enum GcCause {
   // GC triggered by a failed allocation. Thread doing allocation is blocked waiting for GC before
@@ -101,13 +116,16 @@
 };
 static constexpr HeapVerificationMode kDesiredHeapVerification = kNoHeapVerification;
 
-// If true, measure the total allocation time.
-static constexpr bool kMeasureAllocationTime = false;
-// Primitive arrays larger than this size are put in the large object space.
-static constexpr size_t kLargeObjectThreshold = 3 * kPageSize;
+// If true, use rosalloc/RosAllocSpace instead of dlmalloc/DlMallocSpace
+static constexpr bool kUseRosAlloc = true;
 
 class Heap {
  public:
+  // If true, measure the total allocation time.
+  static constexpr bool kMeasureAllocationTime = false;
+  // Primitive arrays larger than this size are put in the large object space.
+  static constexpr size_t kLargeObjectThreshold = 3 * kPageSize;
+
   static constexpr size_t kDefaultInitialSize = 2 * MB;
   static constexpr size_t kDefaultMaximumSize = 32 * MB;
   static constexpr size_t kDefaultMaxFree = 2 * MB;
@@ -126,33 +144,63 @@
   // ImageWriter output.
   explicit Heap(size_t initial_size, size_t growth_limit, size_t min_free,
                 size_t max_free, double target_utilization, size_t capacity,
-                const std::string& original_image_file_name, bool concurrent_gc,
+                const std::string& original_image_file_name, CollectorType collector_type_,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
-                size_t long_pause_threshold, size_t long_gc_threshold, bool ignore_max_footprint);
+                size_t long_pause_threshold, size_t long_gc_threshold,
+                bool ignore_max_footprint);
 
   ~Heap();
 
   // Allocates and initializes storage for an object instance.
-  mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
+  template <const bool kInstrumented>
+  inline mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return AllocObjectInstrumented(self, klass, num_bytes);
+    return AllocObjectWithAllocator<kInstrumented>(self, klass, num_bytes, GetCurrentAllocator());
   }
-  mirror::Object* AllocObjectInstrumented(Thread* self, mirror::Class* klass, size_t num_bytes)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::Object* AllocObjectUninstrumented(Thread* self, mirror::Class* klass, size_t num_bytes)
+  template <const bool kInstrumented>
+  inline mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass,
+                                               size_t num_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocObjectWithAllocator<kInstrumented>(self, klass, num_bytes,
+                                                   GetCurrentNonMovingAllocator());
+  }
+  template <bool kInstrumented, typename PreFenceVisitor = VoidFunctor>
+  ALWAYS_INLINE mirror::Object* AllocObjectWithAllocator(
+      Thread* self, mirror::Class* klass, size_t byte_count, AllocatorType allocator,
+      const PreFenceVisitor& pre_fence_visitor = VoidFunctor())
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void DebugCheckPreconditionsForAllobObject(mirror::Class* c, size_t byte_count)
+  AllocatorType GetCurrentAllocator() const {
+    return current_allocator_;
+  }
+
+  AllocatorType GetCurrentNonMovingAllocator() const {
+    return current_non_moving_allocator_;
+  }
+
+  // Visit all of the live objects in the heap.
+  void VisitObjects(ObjectVisitorCallback callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  void SwapSemiSpaces() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void DebugCheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation);
 
   void RegisterNativeAllocation(JNIEnv* env, int bytes);
   void RegisterNativeFree(JNIEnv* env, int bytes);
 
+  // Change the allocator, updates entrypoints.
+  void ChangeAllocator(AllocatorType allocator);
+
+  // Change the collector to be one of the possible options (MS, CMS, SS).
+  void ChangeCollector(CollectorType collector_type);
+
   // The given reference is believed to be to an object in the Java heap, check the soundness of it.
   void VerifyObjectImpl(const mirror::Object* o);
   void VerifyObject(const mirror::Object* o) {
-    if (o != NULL && this != NULL && verify_object_mode_ > kNoHeapVerification) {
+    if (o != nullptr && this != nullptr && verify_object_mode_ > kNoHeapVerification) {
       VerifyObjectImpl(o);
     }
   }
@@ -169,7 +217,10 @@
   // A weaker test than IsLiveObject or VerifyObject that doesn't require the heap lock,
   // and doesn't abort on error, allowing the caller to report more
   // meaningful diagnostics.
-  bool IsHeapAddress(const mirror::Object* obj);
+  bool IsValidObjectAddress(const mirror::Object* obj) const;
+
+  // Returns true if the address passed in is a heap address, doesn't need to be aligned.
+  bool IsHeapAddress(const mirror::Object* obj) const;
 
   // Returns true if 'obj' is a live heap object, false otherwise (including for invalid addresses).
   // Requires the heap lock to be held.
@@ -177,6 +228,17 @@
                           bool search_live_stack = true, bool sorted = false)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Returns true if there is any chance that the object (obj) will move.
+  bool IsMovableObject(const mirror::Object* obj) const;
+
+  // Returns true if an object is in the temp space, if this happens its usually indicative of
+  // compaction related errors.
+  bool IsInTempSpace(const mirror::Object* obj) const;
+
+  // Enables us to prevent GC until objects are released.
+  void IncrementDisableGC(Thread* self);
+  void DecrementDisableGC(Thread* self);
+
   // Initiates an explicit garbage collection.
   void CollectGarbage(bool clear_soft_references) LOCKS_EXCLUDED(Locks::mutator_lock_);
 
@@ -221,9 +283,9 @@
   // from the system. Doesn't allow the space to exceed its growth limit.
   void SetIdealFootprint(size_t max_allowed_footprint);
 
-  // Blocks the caller until the garbage collector becomes idle and returns
-  // true if we waited for the GC to complete.
-  collector::GcType WaitForConcurrentGcToComplete(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
+  // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
+  // waited for.
+  collector::GcType WaitForGcToComplete(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
 
   const std::vector<space::ContinuousSpace*>& GetContinuousSpaces() const {
     return continuous_spaces_;
@@ -238,29 +300,26 @@
                            MemberOffset reference_queueNext_offset,
                            MemberOffset reference_pendingNext_offset,
                            MemberOffset finalizer_reference_zombie_offset);
-
-  mirror::Object* GetReferenceReferent(mirror::Object* reference);
-  void ClearReferenceReferent(mirror::Object* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Returns true if the reference object has not yet been enqueued.
-  bool IsEnqueuable(const mirror::Object* ref);
-  void EnqueueReference(mirror::Object* ref, mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsEnqueued(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void EnqueuePendingReference(mirror::Object* ref, mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::Object* DequeuePendingReference(mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  MemberOffset GetReferencePendingNextOffset() {
-    DCHECK_NE(reference_pendingNext_offset_.Uint32Value(), 0U);
+  MemberOffset GetReferenceReferentOffset() const {
+    return reference_referent_offset_;
+  }
+  MemberOffset GetReferenceQueueOffset() const {
+    return reference_queue_offset_;
+  }
+  MemberOffset GetReferenceQueueNextOffset() const {
+    return reference_queueNext_offset_;
+  }
+  MemberOffset GetReferencePendingNextOffset() const {
     return reference_pendingNext_offset_;
   }
-
-  MemberOffset GetFinalizerReferenceZombieOffset() {
-    DCHECK_NE(finalizer_reference_zombie_offset_.Uint32Value(), 0U);
+  MemberOffset GetFinalizerReferenceZombieOffset() const {
     return finalizer_reference_zombie_offset_;
   }
+  static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
+  void ProcessReferences(TimingLogger& timings, bool clear_soft, RootVisitor* is_marked_callback,
+                         RootVisitor* recursive_mark_object_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Enable verification of object references when the runtime is sufficiently initialized.
   void EnableObjectValidation() {
@@ -300,6 +359,10 @@
     card_table_->MarkCard(dst);
   }
 
+  void WriteBarrierEveryFieldOf(const mirror::Object* obj) {
+    card_table_->MarkCard(obj);
+  }
+
   accounting::CardTable* GetCardTable() const {
     return card_table_.get();
   }
@@ -312,7 +375,7 @@
   }
 
   // Returns the number of objects currently allocated.
-  size_t GetObjectsAllocated() const;
+  size_t GetObjectsAllocated() const LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   // Returns the total number of objects allocated since the heap was created.
   size_t GetObjectsAllocatedEver() const;
@@ -357,7 +420,11 @@
 
   void DumpForSigQuit(std::ostream& os);
 
-  size_t Trim();
+  // Trim the managed and native heaps by releasing unused memory back to the OS.
+  void Trim();
+
+  void RevokeThreadLocalBuffers(Thread* thread);
+  void RevokeAllThreadLocalBuffers();
 
   accounting::HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     return live_bitmap_.get();
@@ -371,7 +438,7 @@
     return live_stack_.get();
   }
 
-  void PreZygoteFork() LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void PreZygoteFork() NO_THREAD_SAFETY_ANALYSIS;
 
   // Mark and empty stack.
   void FlushAllocStack()
@@ -382,6 +449,10 @@
                       accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Mark the specified allocation stack as live.
+  void MarkAllocStackAsLive(accounting::ObjectStack* stack)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   // Gets called when we get notified by ActivityThread that the process state has changed.
   void ListenForProcessStateChange();
 
@@ -389,31 +460,15 @@
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
 
-  space::DlMallocSpace* GetAllocSpace() const {
-    return alloc_space_;
+  space::MallocSpace* GetNonMovingSpace() const {
+    return non_moving_space_;
   }
 
   space::LargeObjectSpace* GetLargeObjectsSpace() const {
     return large_object_space_;
   }
 
-  Mutex* GetSoftRefQueueLock() {
-    return soft_ref_queue_lock_;
-  }
-
-  Mutex* GetWeakRefQueueLock() {
-    return weak_ref_queue_lock_;
-  }
-
-  Mutex* GetFinalizerRefQueueLock() {
-    return finalizer_ref_queue_lock_;
-  }
-
-  Mutex* GetPhantomRefQueueLock() {
-    return phantom_ref_queue_lock_;
-  }
-
-  void DumpSpaces();
+  void DumpSpaces(std::ostream& stream = LOG(INFO));
 
   // GC performance measuring
   void DumpGcPerformanceInfo(std::ostream& os);
@@ -438,55 +493,41 @@
   accounting::ModUnionTable* FindModUnionTableFromSpace(space::Space* space);
   void AddModUnionTable(accounting::ModUnionTable* mod_union_table);
 
+  bool IsCompilingBoot() const;
+  bool HasImageSpace() const;
+
  private:
-  bool TryAllocLargeObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count,
-                                       mirror::Object** obj_ptr, size_t* bytes_allocated)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool TryAllocLargeObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count,
-                                         mirror::Object** obj_ptr, size_t* bytes_allocated)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count);
-  void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, mirror::Object* obj);
+  void Compact(space::ContinuousMemMapAllocSpace* target_space,
+               space::ContinuousMemMapAllocSpace* source_space);
 
-  // Allocates uninitialized storage. Passing in a null space tries to place the object in the
-  // large object space.
-  template <class T> mirror::Object* AllocateInstrumented(Thread* self, T* space, size_t num_bytes,
-                                                          size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  template <class T> mirror::Object* AllocateUninstrumented(Thread* self, T* space, size_t num_bytes,
-                                                            size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static bool AllocatorHasAllocationStack(AllocatorType allocator_type) {
+    return allocator_type != kAllocatorTypeBumpPointer;
+  }
+  static bool AllocatorHasConcurrentGC(AllocatorType allocator_type) {
+    return allocator_type != kAllocatorTypeBumpPointer;
+  }
+  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const;
+  ALWAYS_INLINE void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
+                                       mirror::Object* obj);
 
   // Handles Allocate()'s slow allocation path with GC involved after
   // an initial allocation attempt failed.
-  mirror::Object* AllocateInternalWithGc(Thread* self, space::AllocSpace* space, size_t num_bytes,
+  mirror::Object* AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t num_bytes,
                                          size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Try to allocate a number of bytes, this function never does any GCs.
-  mirror::Object* TryToAllocateInstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                            bool grow, size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+  // Allocate into a specific space.
+  mirror::Object* AllocateInto(Thread* self, space::AllocSpace* space, mirror::Class* c,
+                               size_t bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Try to allocate a number of bytes, this function never does any GCs. DlMallocSpace-specialized version.
-  mirror::Object* TryToAllocateInstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                            bool grow, size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                              bool grow, size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                              bool grow, size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+  // Try to allocate a number of bytes, this function never does any GCs. Needs to be inlined so
+  // that the switch statement is constant optimized in the entrypoints.
+  template <const bool kInstrumented>
+  ALWAYS_INLINE mirror::Object* TryToAllocate(Thread* self, AllocatorType allocator_type,
+                                              size_t alloc_size, bool grow,
+                                              size_t* bytes_allocated)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation)
@@ -494,7 +535,28 @@
   bool IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow);
 
   // Pushes a list of cleared references out to the managed heap.
-  void EnqueueClearedReferences(mirror::Object** cleared_references);
+  void SetReferenceReferent(mirror::Object* reference, mirror::Object* referent)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* GetReferenceReferent(mirror::Object* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ClearReferenceReferent(mirror::Object* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetReferenceReferent(reference, nullptr);
+  }
+  void EnqueueClearedReferences();
+  // Returns true if the reference object has not yet been enqueued.
+  bool IsEnqueuable(const mirror::Object* ref) const;
+  bool IsEnqueued(mirror::Object* ref) const;
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj, RootVisitor mark_visitor,
+                              void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Run the finalizers.
+  void RunFinalization(JNIEnv* env);
+
+  // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
+  // waited for.
+  collector::GcType WaitForGcToCompleteLocked(Thread* self)
+      EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_);
 
   void RequestHeapTrim() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
@@ -533,9 +595,7 @@
 
   size_t GetPercentFree();
 
-  void AddContinuousSpace(space::ContinuousSpace* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  void AddDiscontinuousSpace(space::DiscontinuousSpace* space)
-      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void AddSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   // No thread saftey analysis since we call this everywhere and it is impossible to find a proper
   // lock ordering for it.
@@ -548,7 +608,7 @@
   void SwapStacks();
 
   // Clear cards and update the mod union table.
-  void ProcessCards(base::TimingLogger& timings);
+  void ProcessCards(TimingLogger& timings);
 
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;
@@ -556,8 +616,12 @@
   // All-known discontinuous spaces, where objects may be placed throughout virtual memory.
   std::vector<space::DiscontinuousSpace*> discontinuous_spaces_;
 
-  // The allocation space we are currently allocating into.
-  space::DlMallocSpace* alloc_space_;
+  // All-known alloc spaces, where objects may be or have been allocated.
+  std::vector<space::AllocSpace*> alloc_spaces_;
+
+  // A space where non-movable objects are allocated, when compaction is enabled it contains
+  // Classes, ArtMethods, ArtFields, and non moving objects.
+  space::MallocSpace* non_moving_space_;
 
   // The large object space we are currently allocating into.
   space::LargeObjectSpace* large_object_space_;
@@ -572,6 +636,9 @@
   // false for stop-the-world mark sweep.
   const bool concurrent_gc_;
 
+  // The current collector type.
+  CollectorType collector_type_;
+
   // How many GC threads we may use for paused parts of garbage collection.
   const size_t parallel_gc_threads_;
 
@@ -595,17 +662,22 @@
   // If we have a zygote space.
   bool have_zygote_space_;
 
+  // Number of pinned primitive arrays in the movable space.
+  // Block all GC until this hits zero, or we hit the timeout!
+  size_t number_gc_blockers_;
+  static constexpr size_t KGCBlockTimeout = 30000;
+
   // Guards access to the state of GC, associated conditional variable is used to signal when a GC
   // completes.
   Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   UniquePtr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
 
-  // Mutexes held when adding references to reference queues.
-  // TODO: move to a UniquePtr, currently annotalysis is confused that UniquePtr isn't lockable.
-  Mutex* soft_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Mutex* weak_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Mutex* finalizer_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Mutex* phantom_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Reference queues.
+  ReferenceQueue soft_reference_queue_;
+  ReferenceQueue weak_reference_queue_;
+  ReferenceQueue finalizer_reference_queue_;
+  ReferenceQueue phantom_reference_queue_;
+  ReferenceQueue cleared_references_;
 
   // True while the garbage collector is running.
   volatile bool is_gc_running_ GUARDED_BY(gc_complete_lock_);
@@ -631,6 +703,9 @@
   // The watermark at which a GC is performed inside of registerNativeAllocation.
   size_t native_footprint_limit_;
 
+  // Whether or not we need to run finalizers in the next native allocation.
+  bool native_need_to_run_finalization_;
+
   // Activity manager members.
   jclass activity_thread_class_;
   jclass application_thread_class_;
@@ -704,24 +779,31 @@
   // Allocation stack, new allocations go here so that we can do sticky mark bits. This enables us
   // to use the live bitmap as the old mark bitmap.
   const size_t max_allocation_stack_size_;
-  bool is_allocation_stack_sorted_;
   UniquePtr<accounting::ObjectStack> allocation_stack_;
 
   // Second allocation stack so that we can process allocation with the heap unlocked.
   UniquePtr<accounting::ObjectStack> live_stack_;
 
+  // Allocator type.
+  AllocatorType current_allocator_;
+  const AllocatorType current_non_moving_allocator_;
+
+  // Which GCs we run in order when we an allocation fails.
+  std::vector<collector::GcType> gc_plan_;
+
+  // Bump pointer spaces.
+  space::BumpPointerSpace* bump_pointer_space_;
+  // Temp space is the space which the semispace collector copies to.
+  space::BumpPointerSpace* temp_space_;
+
   // offset of java.lang.ref.Reference.referent
   MemberOffset reference_referent_offset_;
-
   // offset of java.lang.ref.Reference.queue
   MemberOffset reference_queue_offset_;
-
   // offset of java.lang.ref.Reference.queueNext
   MemberOffset reference_queueNext_offset_;
-
   // offset of java.lang.ref.Reference.pendingNext
   MemberOffset reference_pendingNext_offset_;
-
   // offset of java.lang.ref.FinalizerReference.zombie
   MemberOffset finalizer_reference_zombie_offset_;
 
@@ -744,11 +826,17 @@
   // The current state of heap verification, may be enabled or disabled.
   HeapVerificationMode verify_object_mode_;
 
-  std::vector<collector::MarkSweep*> mark_sweep_collectors_;
+  // GC disable count, error on GC if > 0.
+  size_t gc_disable_count_ GUARDED_BY(gc_complete_lock_);
+
+  std::vector<collector::GarbageCollector*> garbage_collectors_;
+  collector::SemiSpace* semi_space_collector_;
 
   const bool running_on_valgrind_;
 
   friend class collector::MarkSweep;
+  friend class collector::SemiSpace;
+  friend class ReferenceQueue;
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc
index 02708e8..8af2725 100644
--- a/runtime/gc/heap_test.cc
+++ b/runtime/gc/heap_test.cc
@@ -43,12 +43,14 @@
     ScopedObjectAccess soa(Thread::Current());
     // garbage is created during ClassLinker::Init
 
-    mirror::Class* c = class_linker_->FindSystemClass("[Ljava/lang/Object;");
+    SirtRef<mirror::Class> c(soa.Self(), class_linker_->FindSystemClass("[Ljava/lang/Object;"));
     for (size_t i = 0; i < 1024; ++i) {
       SirtRef<mirror::ObjectArray<mirror::Object> > array(soa.Self(),
-          mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), c, 2048));
+          mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), c.get(), 2048));
       for (size_t j = 0; j < 2048; ++j) {
-        array->Set(j, mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!"));
+        mirror::String* string = mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!");
+        // SIRT operator -> deferences the SIRT before running the method.
+        array->Set(j, string);
       }
     }
   }
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
new file mode 100644
index 0000000..d006349
--- /dev/null
+++ b/runtime/gc/reference_queue.cc
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reference_queue.h"
+
+#include "accounting/card_table-inl.h"
+#include "heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+
+namespace art {
+namespace gc {
+
+ReferenceQueue::ReferenceQueue(Heap* heap)
+    : lock_("reference queue lock"),
+      heap_(heap),
+      list_(nullptr) {
+}
+
+void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Object* ref) {
+  DCHECK(ref != NULL);
+  MutexLock mu(self, lock_);
+  if (!heap_->IsEnqueued(ref)) {
+    EnqueuePendingReference(ref);
+  }
+}
+
+void ReferenceQueue::EnqueueReference(mirror::Object* ref) {
+  CHECK(heap_->IsEnqueuable(ref));
+  EnqueuePendingReference(ref);
+}
+
+void ReferenceQueue::EnqueuePendingReference(mirror::Object* ref) {
+  DCHECK(ref != NULL);
+  MemberOffset pending_next_offset = heap_->GetReferencePendingNextOffset();
+  DCHECK_NE(pending_next_offset.Uint32Value(), 0U);
+  if (IsEmpty()) {
+    // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
+    ref->SetFieldObject(pending_next_offset, ref, false);
+    list_ = ref;
+  } else {
+    mirror::Object* head =
+        list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    ref->SetFieldObject(pending_next_offset, head, false);
+    list_->SetFieldObject(pending_next_offset, ref, false);
+  }
+}
+
+mirror::Object* ReferenceQueue::DequeuePendingReference() {
+  DCHECK(!IsEmpty());
+  MemberOffset pending_next_offset = heap_->GetReferencePendingNextOffset();
+  mirror::Object* head = list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+  DCHECK(head != nullptr);
+  mirror::Object* ref;
+  // Note: the following code is thread-safe because it is only called from ProcessReferences which
+  // is single threaded.
+  if (list_ == head) {
+    ref = list_;
+    list_ = nullptr;
+  } else {
+    mirror::Object* next = head->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    list_->SetFieldObject(pending_next_offset, next, false);
+    ref = head;
+  }
+  ref->SetFieldObject(pending_next_offset, nullptr, false);
+  return ref;
+}
+
+void ReferenceQueue::Dump(std::ostream& os) const {
+  mirror::Object* cur = list_;
+  os << "Reference starting at list_=" << list_ << "\n";
+  while (cur != nullptr) {
+    mirror::Object* pending_next =
+        cur->GetFieldObject<mirror::Object*>(heap_->GetReferencePendingNextOffset(), false);
+    os << "PendingNext=" << pending_next;
+    if (cur->GetClass()->IsFinalizerReferenceClass()) {
+      os << " Zombie=" <<
+          cur->GetFieldObject<mirror::Object*>(heap_->GetFinalizerReferenceZombieOffset(), false);
+    }
+    os << "\n";
+    cur = pending_next;
+  }
+}
+
+void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor,
+                                          void* arg) {
+  while (!IsEmpty()) {
+    mirror::Object* ref = DequeuePendingReference();
+    mirror::Object* referent = heap_->GetReferenceReferent(ref);
+    if (referent != nullptr) {
+      mirror::Object* forward_address = visitor(referent, arg);
+      if (forward_address == nullptr) {
+        // Referent is white, clear it.
+        heap_->ClearReferenceReferent(ref);
+        if (heap_->IsEnqueuable(ref)) {
+          cleared_references.EnqueuePendingReference(ref);
+        }
+      } else if (referent != forward_address) {
+        // Object moved, need to updated the referrent.
+        heap_->SetReferenceReferent(ref, forward_address);
+      }
+    }
+  }
+}
+
+void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
+                                                RootVisitor is_marked_callback,
+                                                RootVisitor recursive_mark_callback, void* arg) {
+  while (!IsEmpty()) {
+    mirror::Object* ref = DequeuePendingReference();
+    mirror::Object* referent = heap_->GetReferenceReferent(ref);
+    if (referent != nullptr) {
+      mirror::Object* forward_address = is_marked_callback(referent, arg);
+      // If the referent isn't marked, mark it and update the
+      if (forward_address == nullptr) {
+        forward_address = recursive_mark_callback(referent, arg);
+        // If the referent is non-null the reference must queuable.
+        DCHECK(heap_->IsEnqueuable(ref));
+        // Move the updated referent to the zombie field.
+        ref->SetFieldObject(heap_->GetFinalizerReferenceZombieOffset(), forward_address, false);
+        heap_->ClearReferenceReferent(ref);
+        cleared_references.EnqueueReference(ref);
+      } else if (referent != forward_address) {
+        heap_->SetReferenceReferent(ref, forward_address);
+      }
+    }
+  }
+}
+
+void ReferenceQueue::PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg) {
+  ReferenceQueue cleared(heap_);
+  while (!IsEmpty()) {
+    mirror::Object* ref = DequeuePendingReference();
+    mirror::Object* referent = heap_->GetReferenceReferent(ref);
+    if (referent != nullptr) {
+      mirror::Object* forward_address = preserve_callback(referent, arg);
+      if (forward_address == nullptr) {
+        // Either the reference isn't marked or we don't wish to preserve it.
+        cleared.EnqueuePendingReference(ref);
+      } else {
+        heap_->SetReferenceReferent(ref, forward_address);
+      }
+    }
+  }
+  list_ = cleared.GetList();
+}
+
+}  // namespace gc
+}  // namespace art
+
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
new file mode 100644
index 0000000..89589c3
--- /dev/null
+++ b/runtime/gc/reference_queue.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_REFERENCE_QUEUE_H_
+#define ART_RUNTIME_GC_REFERENCE_QUEUE_H_
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+#include "atomic_integer.h"
+#include "base/timing_logger.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+#include "jni.h"
+#include "locks.h"
+#include "offsets.h"
+#include "root_visitor.h"
+#include "thread_pool.h"
+
+namespace art {
+namespace gc {
+
+class Heap;
+
+// Used to temporarily store java.lang.ref.Reference(s) during GC and prior to queueing on the
+// appropriate java.lang.ref.ReferenceQueue. The linked list is maintained in the
+// java.lang.ref.Reference objects.
+class ReferenceQueue {
+ public:
+  explicit ReferenceQueue(Heap* heap);
+  // Enqueue a reference if is not already enqueued. Thread safe to call from multiple threads
+  // since it uses a lock to avoid a race between checking for the references presence and adding
+  // it.
+  void AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Object* ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
+  // Enqueue a reference, unlike EnqueuePendingReference, enqueue reference checks that the
+  // reference IsEnqueueable. Not thread safe, used when mutators are paused to minimize lock
+  // overhead.
+  void EnqueueReference(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void EnqueuePendingReference(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* DequeuePendingReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Enqueues finalizer references with white referents.  White referents are blackened, moved to the
+  // zombie field, and the referent field is cleared.
+  void EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
+                                  RootVisitor is_marked_callback,
+                                  RootVisitor recursive_mark_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Walks the reference list marking any references subject to the reference clearing policy.
+  // References with a black referent are removed from the list.  References with white referents
+  // biased toward saving are blackened and also removed from the list.
+  void PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Unlink the reference list clearing references objects with white referents.  Cleared references
+  // registered to a reference queue are scheduled for appending by the heap worker thread.
+  void ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Dump(std::ostream& os) const
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsEmpty() const {
+    return list_ == nullptr;
+  }
+  void Clear() {
+    list_ = nullptr;
+  }
+  mirror::Object* GetList() {
+    return list_;
+  }
+
+ private:
+  // Lock, used for parallel GC reference enqueuing. It allows for multiple threads simultaneously
+  // calling AtomicEnqueueIfNotEnqueued.
+  Mutex lock_;
+  // The heap contains the reference offsets.
+  Heap* const heap_;
+  // The actual reference list. Not a root since it will be nullptr when the GC is not running.
+  mirror::Object* list_;
+};
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_REFERENCE_QUEUE_H_
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
new file mode 100644
index 0000000..85ef2f4
--- /dev/null
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_INL_H_
+
+#include "bump_pointer_space.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+inline mirror::Object* BumpPointerSpace::AllocNonvirtual(size_t num_bytes) {
+  num_bytes = RoundUp(num_bytes, kAlignment);
+  byte* old_end;
+  byte* new_end;
+  do {
+    old_end = end_;
+    new_end = old_end + num_bytes;
+    // If there is no more room in the region, we are out of memory.
+    if (UNLIKELY(new_end > growth_end_)) {
+      return nullptr;
+    }
+    // TODO: Use a cas which always equals the size of pointers.
+  } while (android_atomic_cas(reinterpret_cast<int32_t>(old_end),
+                              reinterpret_cast<int32_t>(new_end),
+                              reinterpret_cast<volatile int32_t*>(&end_)) != 0);
+  // TODO: Less statistics?
+  total_bytes_allocated_.fetch_add(num_bytes);
+  num_objects_allocated_.fetch_add(1);
+  total_objects_allocated_.fetch_add(1);
+  return reinterpret_cast<mirror::Object*>(old_end);
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_INL_H_
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
new file mode 100644
index 0000000..06ba57e
--- /dev/null
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bump_pointer_space.h"
+#include "bump_pointer_space-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/class-inl.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+BumpPointerSpace* BumpPointerSpace::Create(const std::string& name, size_t capacity,
+                                           byte* requested_begin) {
+  capacity = RoundUp(capacity, kPageSize);
+  std::string error_msg;
+  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
+                                                 PROT_READ | PROT_WRITE, &error_msg));
+  if (mem_map.get() == nullptr) {
+    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
+        << PrettySize(capacity) << " with message " << error_msg;
+    return nullptr;
+  }
+  return new BumpPointerSpace(name, mem_map.release());
+}
+
+BumpPointerSpace::BumpPointerSpace(const std::string& name, byte* begin, byte* limit)
+    : ContinuousMemMapAllocSpace(name, nullptr, begin, begin, limit,
+                                 kGcRetentionPolicyAlwaysCollect),
+      num_objects_allocated_(0), total_bytes_allocated_(0), total_objects_allocated_(0),
+      growth_end_(limit) {
+}
+
+BumpPointerSpace::BumpPointerSpace(const std::string& name, MemMap* mem_map)
+    : ContinuousMemMapAllocSpace(name, mem_map, mem_map->Begin(), mem_map->Begin(), mem_map->End(),
+                                 kGcRetentionPolicyAlwaysCollect),
+      num_objects_allocated_(0), total_bytes_allocated_(0), total_objects_allocated_(0),
+      growth_end_(mem_map->End()) {
+}
+
+mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* ret = AllocNonvirtual(num_bytes);
+  if (LIKELY(ret != nullptr)) {
+    *bytes_allocated = num_bytes;
+  }
+  return ret;
+}
+
+size_t BumpPointerSpace::AllocationSize(const mirror::Object* obj) {
+  return AllocationSizeNonvirtual(obj);
+}
+
+void BumpPointerSpace::Clear() {
+  // Release the pages back to the operating system.
+  CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed";
+  // Reset the end of the space back to the beginning, we move the end forward as we allocate
+  // objects.
+  SetEnd(Begin());
+  growth_end_ = Limit();
+  num_objects_allocated_ = 0;
+}
+
+void BumpPointerSpace::Dump(std::ostream& os) const {
+  os << reinterpret_cast<void*>(Begin()) << "-" << reinterpret_cast<void*>(End()) << " - "
+     << reinterpret_cast<void*>(Limit());
+}
+
+mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) {
+  const uintptr_t position = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf();
+  return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment));
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
new file mode 100644
index 0000000..2edd3e2
--- /dev/null
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
+
+#include "space.h"
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// A bump pointer space is a space where objects may be allocated and garbage collected.
+class BumpPointerSpace : public ContinuousMemMapAllocSpace {
+ public:
+  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
+
+  SpaceType GetType() const {
+    return kSpaceTypeBumpPointerSpace;
+  }
+
+  // Create a bump pointer space with the requested sizes. The requested base address is not
+  // guaranteed to be granted, if it is required, the caller should call Begin on the returned
+  // space to confirm the request was granted.
+  static BumpPointerSpace* Create(const std::string& name, size_t capacity, byte* requested_begin);
+
+  // Allocate num_bytes, returns nullptr if the space is full.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+  mirror::Object* AllocNonvirtual(size_t num_bytes);
+
+  // Return the storage space required by obj.
+  virtual size_t AllocationSize(const mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Nos unless we support free lists.
+  virtual size_t Free(Thread*, mirror::Object*) {
+    return 0;
+  }
+  virtual size_t FreeList(Thread*, size_t, mirror::Object**) {
+    return 0;
+  }
+
+  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return obj->SizeOf();
+  }
+
+  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
+  // maximum reserved size of the heap.
+  void ClearGrowthLimit() {
+    growth_end_ = Limit();
+  }
+
+  // Override capacity so that we only return the possibly limited capacity
+  size_t Capacity() const {
+    return growth_end_ - begin_;
+  }
+
+  // The total amount of memory reserved for the space.
+  size_t NonGrowthLimitCapacity() const {
+    return GetMemMap()->Size();
+  }
+
+  accounting::SpaceBitmap* GetLiveBitmap() const {
+    return nullptr;
+  }
+
+  accounting::SpaceBitmap* GetMarkBitmap() const {
+    return nullptr;
+  }
+
+  // Clear the memory and reset the pointer to the start of the space.
+  void Clear();
+
+  void Dump(std::ostream& os) const;
+
+  uint64_t GetBytesAllocated() {
+    return Size();
+  }
+
+  uint64_t GetObjectsAllocated() {
+    return num_objects_allocated_;
+  }
+
+  uint64_t GetTotalBytesAllocated() {
+    return total_bytes_allocated_;
+  }
+
+  uint64_t GetTotalObjectsAllocated() {
+    return total_objects_allocated_;
+  }
+
+  bool Contains(const mirror::Object* obj) const {
+    const byte* byte_obj = reinterpret_cast<const byte*>(obj);
+    return byte_obj >= Begin() && byte_obj < End();
+  }
+
+  // TODO: Change this? Mainly used for compacting to a particular region of memory.
+  BumpPointerSpace(const std::string& name, byte* begin, byte* limit);
+
+  // Return the object which comes after obj, while ensuring alignment.
+  static mirror::Object* GetNextObject(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  virtual BumpPointerSpace* AsBumpPointerSpace() {
+    return this;
+  }
+
+  // Object alignment within the space.
+  static constexpr size_t kAlignment = 8;
+
+ protected:
+  BumpPointerSpace(const std::string& name, MemMap* mem_map);
+
+  size_t InternalAllocationSize(const mirror::Object* obj);
+  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Approximate number of bytes which have been allocated into the space.
+  AtomicInteger num_objects_allocated_;
+  AtomicInteger total_bytes_allocated_;
+  AtomicInteger total_objects_allocated_;
+
+  byte* growth_end_;
+
+ private:
+  friend class collector::MarkSweep;
+  DISALLOW_COPY_AND_ASSIGN(BumpPointerSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index fb2c66b..c14a4e1 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
 
 #include "dlmalloc_space.h"
+#include "thread.h"
 
 namespace art {
 namespace gc {
@@ -28,7 +29,7 @@
   mirror::Object* obj;
   {
     MutexLock mu(self, lock_);
-    obj = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
   }
   if (LIKELY(obj != NULL)) {
     // Zero freshly allocated memory, done while not holding the space's lock.
@@ -37,8 +38,9 @@
   return obj;
 }
 
-inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated) {
-  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
+inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes,
+                                                               size_t* bytes_allocated) {
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_for_alloc_, num_bytes));
   if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 9ebc16a..10e9ed8 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -13,13 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "dlmalloc_space.h"
+
 #include "dlmalloc_space-inl.h"
 #include "gc/accounting/card_table.h"
 #include "gc/heap.h"
+#include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "thread.h"
+#include "thread_list.h"
 #include "utils.h"
 
 #include <valgrind.h>
@@ -29,170 +33,40 @@
 namespace gc {
 namespace space {
 
-// TODO: Remove define macro
-#define CHECK_MEMORY_CALL(call, args, what) \
-  do { \
-    int rc = call args; \
-    if (UNLIKELY(rc != 0)) { \
-      errno = rc; \
-      PLOG(FATAL) << # call << " failed for " << what; \
-    } \
-  } while (false)
-
 static const bool kPrefetchDuringDlMallocFreeList = true;
 
-// Number of bytes to use as a red zone (rdz). A red zone of this size will be placed before and
-// after each allocation. 8 bytes provides long/double alignment.
-const size_t kValgrindRedZoneBytes = 8;
-
-// A specialization of DlMallocSpace that provides information to valgrind wrt allocations.
-class ValgrindDlMallocSpace : public DlMallocSpace {
- public:
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
-    void* obj_with_rdz = DlMallocSpace::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes,
-                                                        bytes_allocated);
-    if (obj_with_rdz == NULL) {
-      return NULL;
-    }
-    mirror::Object* result = reinterpret_cast<mirror::Object*>(
-        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
-    // Make redzones as no access.
-    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
-    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
-    return result;
-  }
-
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
-    void* obj_with_rdz = DlMallocSpace::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes,
-                                              bytes_allocated);
-    if (obj_with_rdz == NULL) {
-     return NULL;
-    }
-    mirror::Object* result = reinterpret_cast<mirror::Object*>(
-        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
-    // Make redzones as no access.
-    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
-    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
-    return result;
-  }
-
-  virtual size_t AllocationSize(const mirror::Object* obj) {
-    size_t result = DlMallocSpace::AllocationSize(reinterpret_cast<const mirror::Object*>(
-        reinterpret_cast<const byte*>(obj) - kValgrindRedZoneBytes));
-    return result - 2 * kValgrindRedZoneBytes;
-  }
-
-  virtual size_t Free(Thread* self, mirror::Object* ptr) {
-    void* obj_after_rdz = reinterpret_cast<void*>(ptr);
-    void* obj_with_rdz = reinterpret_cast<byte*>(obj_after_rdz) - kValgrindRedZoneBytes;
-    // Make redzones undefined.
-    size_t allocation_size = DlMallocSpace::AllocationSize(
-        reinterpret_cast<mirror::Object*>(obj_with_rdz));
-    VALGRIND_MAKE_MEM_UNDEFINED(obj_with_rdz, allocation_size);
-    size_t freed = DlMallocSpace::Free(self, reinterpret_cast<mirror::Object*>(obj_with_rdz));
-    return freed - 2 * kValgrindRedZoneBytes;
-  }
-
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
-    size_t freed = 0;
-    for (size_t i = 0; i < num_ptrs; i++) {
-      freed += Free(self, ptrs[i]);
-    }
-    return freed;
-  }
-
-  ValgrindDlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
-                        byte* end, size_t growth_limit, size_t initial_size) :
-      DlMallocSpace(name, mem_map, mspace, begin, end, growth_limit) {
-    VALGRIND_MAKE_MEM_UNDEFINED(mem_map->Begin() + initial_size, mem_map->Size() - initial_size);
-  }
-
-  virtual ~ValgrindDlMallocSpace() {
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(ValgrindDlMallocSpace);
-};
-
-size_t DlMallocSpace::bitmap_index_ = 0;
-
 DlMallocSpace::DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
-                       byte* end, size_t growth_limit)
-    : MemMapSpace(name, mem_map, end - begin, kGcRetentionPolicyAlwaysCollect),
-      recent_free_pos_(0), total_bytes_freed_(0), total_objects_freed_(0),
-      lock_("allocation space lock", kAllocSpaceLock), mspace_(mspace),
-      growth_limit_(growth_limit) {
+                             byte* end, byte* limit, size_t growth_limit)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit),
+      total_bytes_freed_(0), total_objects_freed_(0), mspace_(mspace), mspace_for_alloc_(mspace) {
   CHECK(mspace != NULL);
-
-  size_t bitmap_index = bitmap_index_++;
-
-  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
-  live_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
-
-  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
-
-  for (auto& freed : recent_freed_objects_) {
-    freed.first = nullptr;
-    freed.second = nullptr;
-  }
 }
 
-DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t
-                                     growth_limit, size_t capacity, byte* requested_begin) {
-  // Memory we promise to dlmalloc before it asks for morecore.
-  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
-  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
-  // size of the large allocation) will be greater than the footprint limit.
-  size_t starting_size = kPageSize;
+DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
-    VLOG(startup) << "Space::CreateAllocSpace entering " << name
+    VLOG(startup) << "DlMallocSpace::Create entering " << name
                   << " initial_size=" << PrettySize(initial_size)
                   << " growth_limit=" << PrettySize(growth_limit)
                   << " capacity=" << PrettySize(capacity)
                   << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
   }
 
-  // Sanity check arguments
-  if (starting_size > initial_size) {
-    initial_size = starting_size;
-  }
-  if (initial_size > growth_limit) {
-    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the initial size ("
-        << PrettySize(initial_size) << ") is larger than its capacity ("
-        << PrettySize(growth_limit) << ")";
+  // Memory we promise to dlmalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
     return NULL;
   }
-  if (growth_limit > capacity) {
-    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the growth limit capacity ("
-        << PrettySize(growth_limit) << ") is larger than the capacity ("
-        << PrettySize(capacity) << ")";
-    return NULL;
-  }
-
-  // Page align growth limit and capacity which will be used to manage mmapped storage
-  growth_limit = RoundUp(growth_limit, kPageSize);
-  capacity = RoundUp(capacity, kPageSize);
-
-  std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
-                                                 PROT_READ | PROT_WRITE, &error_msg));
-  if (mem_map.get() == NULL) {
-    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
-        << PrettySize(capacity) << ": " << error_msg;
-    return NULL;
-  }
-
-  void* mspace = CreateMallocSpace(mem_map->Begin(), starting_size, initial_size);
+  void* mspace = CreateMspace(mem_map->Begin(), starting_size, initial_size);
   if (mspace == NULL) {
     LOG(ERROR) << "Failed to initialize mspace for alloc space (" << name << ")";
     return NULL;
@@ -205,22 +79,23 @@
   }
 
   // Everything is set so record in immutable structure and leave
-  MemMap* mem_map_ptr = mem_map.release();
   DlMallocSpace* space;
+  byte* begin = mem_map->Begin();
   if (RUNNING_ON_VALGRIND > 0) {
-    space = new ValgrindDlMallocSpace(name, mem_map_ptr, mspace, mem_map_ptr->Begin(), end,
-                                      growth_limit, initial_size);
+    space = new ValgrindMallocSpace<DlMallocSpace, void*>(
+        name, mem_map, mspace, begin, end, begin + capacity, growth_limit, initial_size);
   } else {
-    space = new DlMallocSpace(name, mem_map_ptr, mspace, mem_map_ptr->Begin(), end, growth_limit);
+    space = new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
   }
+  // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    LOG(INFO) << "Space::CreateAllocSpace exiting (" << PrettyDuration(NanoTime() - start_time)
+    LOG(INFO) << "DlMallocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
         << " ) " << *space;
   }
   return space;
 }
 
-void* DlMallocSpace::CreateMallocSpace(void* begin, size_t morecore_start, size_t initial_size) {
+void* DlMallocSpace::CreateMspace(void* begin, size_t morecore_start, size_t initial_size) {
   // clear errno to allow PLOG on error
   errno = 0;
   // create mspace using our backing storage starting at begin and with a footprint of
@@ -236,14 +111,6 @@
   return msp;
 }
 
-void DlMallocSpace::SwapBitmaps() {
-  live_bitmap_.swap(mark_bitmap_);
-  // Swap names to get more descriptive diagnostics.
-  std::string temp_name(live_bitmap_->GetName());
-  live_bitmap_->SetName(mark_bitmap_->GetName());
-  mark_bitmap_->SetName(temp_name);
-}
-
 mirror::Object* DlMallocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
   return AllocNonvirtual(self, num_bytes, bytes_allocated);
 }
@@ -252,11 +119,11 @@
   mirror::Object* result;
   {
     MutexLock mu(self, lock_);
-    // Grow as much as possible within the mspace.
+    // Grow as much as possible within the space.
     size_t max_allowed = Capacity();
     mspace_set_footprint_limit(mspace_, max_allowed);
     // Try the allocation.
-    result = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
     // Shrink back down as small as possible.
     size_t footprint = mspace_footprint(mspace_);
     mspace_set_footprint_limit(mspace_, footprint);
@@ -270,82 +137,9 @@
   return result;
 }
 
-void DlMallocSpace::SetGrowthLimit(size_t growth_limit) {
-  growth_limit = RoundUp(growth_limit, kPageSize);
-  growth_limit_ = growth_limit;
-  if (Size() > growth_limit_) {
-    end_ = begin_ + growth_limit;
-  }
-}
-
-DlMallocSpace* DlMallocSpace::CreateZygoteSpace(const char* alloc_space_name) {
-  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
-  DCHECK(IsAligned<kPageSize>(begin_));
-  DCHECK(IsAligned<kPageSize>(end_));
-  size_t size = RoundUp(Size(), kPageSize);
-  // Trim the heap so that we minimize the size of the Zygote space.
-  Trim();
-  // TODO: Not hardcode these in?
-  const size_t starting_size = kPageSize;
-  const size_t initial_size = 2 * MB;
-  // Remaining size is for the new alloc space.
-  const size_t growth_limit = growth_limit_ - size;
-  const size_t capacity = Capacity() - size;
-  VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
-             << "End " << reinterpret_cast<const void*>(end_) << "\n"
-             << "Size " << size << "\n"
-             << "GrowthLimit " << growth_limit_ << "\n"
-             << "Capacity " << Capacity();
-  SetGrowthLimit(RoundUp(size, kPageSize));
-  SetFootprintLimit(RoundUp(size, kPageSize));
-  // FIXME: Do we need reference counted pointers here?
-  // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
-  VLOG(heap) << "Creating new AllocSpace: ";
-  VLOG(heap) << "Size " << GetMemMap()->Size();
-  VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
-  VLOG(heap) << "Capacity " << PrettySize(capacity);
-  // Remap the tail.
-  std::string error_msg;
-  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
-                                                    PROT_READ | PROT_WRITE, &error_msg));
-  CHECK(mem_map.get() != nullptr) << error_msg;
-  void* mspace = CreateMallocSpace(end_, starting_size, initial_size);
-  // Protect memory beyond the initial size.
-  byte* end = mem_map->Begin() + starting_size;
-  if (capacity - initial_size > 0) {
-    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
-  }
-  DlMallocSpace* alloc_space =
-      new DlMallocSpace(alloc_space_name, mem_map.release(), mspace, end_, end, growth_limit);
-  live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
-  CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
-  CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  VLOG(heap) << "zygote space creation done";
-  return alloc_space;
-}
-
-mirror::Class* DlMallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
-  size_t pos = recent_free_pos_;
-  // Start at the most recently freed object and work our way back since there may be duplicates
-  // caused by dlmalloc reusing memory.
-  if (kRecentFreeCount > 0) {
-    for (size_t i = 0; i + 1 < kRecentFreeCount + 1; ++i) {
-      pos = pos != 0 ? pos - 1 : kRecentFreeMask;
-      if (recent_freed_objects_[pos].first == obj) {
-        return recent_freed_objects_[pos].second;
-      }
-    }
-  }
-  return nullptr;
-}
-
-void DlMallocSpace::RegisterRecentFree(mirror::Object* ptr) {
-  recent_freed_objects_[recent_free_pos_].first = ptr;
-  recent_freed_objects_[recent_free_pos_].second = ptr->GetClass();
-  recent_free_pos_ = (recent_free_pos_ + 1) & kRecentFreeMask;
+MallocSpace* DlMallocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator, byte* begin, byte* end,
+                                           byte* limit, size_t growth_limit) {
+  return new DlMallocSpace(name, mem_map, allocator, begin, end, limit, growth_limit);
 }
 
 size_t DlMallocSpace::Free(Thread* self, mirror::Object* ptr) {
@@ -412,38 +206,9 @@
 // Callback from dlmalloc when it needs to increase the footprint
 extern "C" void* art_heap_morecore(void* mspace, intptr_t increment) {
   Heap* heap = Runtime::Current()->GetHeap();
-  DCHECK_EQ(heap->GetAllocSpace()->GetMspace(), mspace);
-  return heap->GetAllocSpace()->MoreCore(increment);
-}
-
-void* DlMallocSpace::MoreCore(intptr_t increment) {
-  lock_.AssertHeld(Thread::Current());
-  byte* original_end = end_;
-  if (increment != 0) {
-    VLOG(heap) << "DlMallocSpace::MoreCore " << PrettySize(increment);
-    byte* new_end = original_end + increment;
-    if (increment > 0) {
-      // Should never be asked to increase the allocation beyond the capacity of the space. Enforced
-      // by mspace_set_footprint_limit.
-      CHECK_LE(new_end, Begin() + Capacity());
-      CHECK_MEMORY_CALL(mprotect, (original_end, increment, PROT_READ | PROT_WRITE), GetName());
-    } else {
-      // Should never be asked for negative footprint (ie before begin)
-      CHECK_GT(original_end + increment, Begin());
-      // Advise we don't need the pages and protect them
-      // TODO: by removing permissions to the pages we may be causing TLB shoot-down which can be
-      // expensive (note the same isn't true for giving permissions to a page as the protected
-      // page shouldn't be in a TLB). We should investigate performance impact of just
-      // removing ignoring the memory protection change here and in Space::CreateAllocSpace. It's
-      // likely just a useful debug feature.
-      size_t size = -increment;
-      CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
-      CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
-    }
-    // Update end_
-    end_ = new_end;
-  }
-  return original_end;
+  DCHECK(heap->GetNonMovingSpace()->IsDlMallocSpace());
+  DCHECK_EQ(heap->GetNonMovingSpace()->AsDlMallocSpace()->GetMspace(), mspace);
+  return heap->GetNonMovingSpace()->MoreCore(increment);
 }
 
 // Virtual functions can't get inlined.
@@ -484,7 +249,7 @@
 
 void DlMallocSpace::SetFootprintLimit(size_t new_size) {
   MutexLock mu(Thread::Current(), lock_);
-  VLOG(heap) << "DLMallocSpace::SetFootprintLimit " << PrettySize(new_size);
+  VLOG(heap) << "DlMallocSpace::SetFootprintLimit " << PrettySize(new_size);
   // Compare against the actual footprint, rather than the Size(), because the heap may not have
   // grown all the way to the allowed size yet.
   size_t current_space_size = mspace_footprint(mspace_);
@@ -495,14 +260,6 @@
   mspace_set_footprint_limit(mspace_, new_size);
 }
 
-void DlMallocSpace::Dump(std::ostream& os) const {
-  os << GetType()
-      << " begin=" << reinterpret_cast<void*>(Begin())
-      << ",end=" << reinterpret_cast<void*>(End())
-      << ",size=" << PrettySize(Size()) << ",capacity=" << PrettySize(Capacity())
-      << ",name=\"" << GetName() << "\"]";
-}
-
 uint64_t DlMallocSpace::GetBytesAllocated() {
   MutexLock mu(Thread::Current(), lock_);
   size_t bytes_allocated = 0;
@@ -517,6 +274,12 @@
   return objects_allocated;
 }
 
+#ifndef NDEBUG
+void DlMallocSpace::CheckMoreCoreForPrecondition() {
+  lock_.AssertHeld(Thread::Current());
+}
+#endif
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 522535e..d18d4ad 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_H_
 
 #include "gc/allocator/dlmalloc.h"
+#include "malloc_space.h"
 #include "space.h"
 
 namespace art {
@@ -30,33 +31,18 @@
 namespace space {
 
 // An alloc space is a space where objects may be allocated and garbage collected.
-class DlMallocSpace : public MemMapSpace, public AllocSpace {
+class DlMallocSpace : public MallocSpace {
  public:
-  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
-
-  SpaceType GetType() const {
-    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
-      return kSpaceTypeZygoteSpace;
-    } else {
-      return kSpaceTypeAllocSpace;
-    }
-  }
-
-  // Create a AllocSpace with the requested sizes. The requested
+  // Create a DlMallocSpace with the requested sizes. The requested
   // base address is not guaranteed to be granted, if it is required,
-  // the caller should call Begin on the returned space to confirm
-  // the request was granted.
+  // the caller should call Begin on the returned space to confirm the
+  // request was granted.
   static DlMallocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
                                size_t capacity, byte* requested_begin);
 
-  // Allocate num_bytes without allowing the underlying mspace to grow.
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
-
-  // Allocate num_bytes allowing the underlying mspace to grow.
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-
-  // Return the storage space required by obj.
   virtual size_t AllocationSize(const mirror::Object* obj);
   virtual size_t Free(Thread* self, mirror::Object* ptr);
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
@@ -64,17 +50,19 @@
   mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
   size_t AllocationSizeNonvirtual(const mirror::Object* obj) {
-    return mspace_usable_size(const_cast<void*>(reinterpret_cast<const void*>(obj))) +
-        kChunkOverhead;
+    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+    return mspace_usable_size(obj_ptr) + kChunkOverhead;
   }
 
-  void* MoreCore(intptr_t increment);
+#ifndef NDEBUG
+  // Override only in the debug build.
+  void CheckMoreCoreForPrecondition();
+#endif
 
   void* GetMspace() const {
     return mspace_;
   }
 
-  // Hands unused pages back to the system.
   size_t Trim();
 
   // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
@@ -93,39 +81,8 @@
   // allocations fail we GC before increasing the footprint limit and allowing the mspace to grow.
   void SetFootprintLimit(size_t limit);
 
-  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
-  // maximum reserved size of the heap.
-  void ClearGrowthLimit() {
-    growth_limit_ = NonGrowthLimitCapacity();
-  }
-
-  // Override capacity so that we only return the possibly limited capacity
-  size_t Capacity() const {
-    return growth_limit_;
-  }
-
-  // The total amount of memory reserved for the alloc space.
-  size_t NonGrowthLimitCapacity() const {
-    return GetMemMap()->Size();
-  }
-
-  accounting::SpaceBitmap* GetLiveBitmap() const {
-    return live_bitmap_.get();
-  }
-
-  accounting::SpaceBitmap* GetMarkBitmap() const {
-    return mark_bitmap_.get();
-  }
-
-  void Dump(std::ostream& os) const;
-
-  void SetGrowthLimit(size_t growth_limit);
-
-  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
-  void SwapBitmaps();
-
-  // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
-  DlMallocSpace* CreateZygoteSpace(const char* alloc_space_name);
+  MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                              byte* begin, byte* end, byte* limit, size_t growth_limit);
 
   uint64_t GetBytesAllocated();
   uint64_t GetObjectsAllocated();
@@ -139,51 +96,45 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
+  virtual void InvalidateAllocator() {
+    mspace_for_alloc_ = nullptr;
+  }
+
+  virtual bool IsDlMallocSpace() const {
+    return true;
+  }
+  virtual DlMallocSpace* AsDlMallocSpace() {
+    return this;
+  }
+
  protected:
   DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin, byte* end,
-                size_t growth_limit);
+                byte* limit, size_t growth_limit);
 
  private:
   size_t InternalAllocationSize(const mirror::Object* obj);
-  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
+
+  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  bool Init(size_t initial_size, size_t maximum_size, size_t growth_size, byte* requested_base);
-  void RegisterRecentFree(mirror::Object* ptr);
-  static void* CreateMallocSpace(void* base, size_t morecore_start, size_t initial_size);
 
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
-
-  // Recent allocation buffer.
-  static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
-  static constexpr size_t kRecentFreeMask = kRecentFreeCount - 1;
-  std::pair<const mirror::Object*, mirror::Class*> recent_freed_objects_[kRecentFreeCount];
-  size_t recent_free_pos_;
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
+    return CreateMspace(base, morecore_start, initial_size);
+  }
+  static void* CreateMspace(void* base, size_t morecore_start, size_t initial_size);
 
   // Approximate number of bytes and objects which have been deallocated in the space.
   size_t total_bytes_freed_;
   size_t total_objects_freed_;
 
-  static size_t bitmap_index_;
-
   // The boundary tag overhead.
   static const size_t kChunkOverhead = kWordSize;
 
-  // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
-  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-
   // Underlying malloc space
   void* const mspace_;
 
-  // The capacity of the alloc space until such time that ClearGrowthLimit is called.
-  // The underlying mem_map_ controls the maximum size we allow the heap to grow to. The growth
-  // limit is a value <= to the mem_map_ capacity used for ergonomic reasons because of the zygote.
-  // Prior to forking the zygote the heap will have a maximally sized mem_map_ but the growth_limit_
-  // will be set to a lower value. The growth_limit_ is used as the capacity of the alloc_space_,
-  // however, capacity normally can't vary. In the case of the growth_limit_ it can be cleared
-  // one time by a call to ClearGrowthLimit.
-  size_t growth_limit_;
+  // A mspace pointer used for allocation. Equals to what mspace_
+  // points to or nullptr after InvalidateAllocator() is called.
+  void* mspace_for_alloc_;
 
   friend class collector::MarkSweep;
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index e12ee06..c6177bd 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -39,8 +39,9 @@
 
 ImageSpace::ImageSpace(const std::string& name, MemMap* mem_map,
                        accounting::SpaceBitmap* live_bitmap)
-    : MemMapSpace(name, mem_map, mem_map->Size(), kGcRetentionPolicyNeverCollect) {
-  DCHECK(live_bitmap != NULL);
+    : MemMapSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
+                  kGcRetentionPolicyNeverCollect) {
+  DCHECK(live_bitmap != nullptr);
   live_bitmap_.reset(live_bitmap);
 }
 
@@ -332,7 +333,7 @@
 
 void ImageSpace::Dump(std::ostream& os) const {
   os << GetType()
-      << "begin=" << reinterpret_cast<void*>(Begin())
+      << " begin=" << reinterpret_cast<void*>(Begin())
       << ",end=" << reinterpret_cast<void*>(End())
       << ",size=" << PrettySize(Size())
       << ",name=\"" << GetName() << "\"]";
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index ef889d4..d374ad3 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -59,6 +59,14 @@
 
   size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
 
+  virtual bool IsAllocSpace() const {
+    return true;
+  }
+
+  virtual AllocSpace* AsAllocSpace() {
+    return this;
+  }
+
  protected:
   explicit LargeObjectSpace(const std::string& name);
 
@@ -108,7 +116,8 @@
   virtual ~FreeListSpace();
   static FreeListSpace* Create(const std::string& name, byte* requested_begin, size_t capacity);
 
-  size_t AllocationSize(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  size_t AllocationSize(const mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* obj);
   bool Contains(const mirror::Object* obj) const;
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
new file mode 100644
index 0000000..785b5ed
--- /dev/null
+++ b/runtime/gc/space/malloc_space.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "malloc_space.h"
+
+#include "gc/accounting/card_table.h"
+#include "gc/heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "utils.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+size_t MallocSpace::bitmap_index_ = 0;
+
+MallocSpace::MallocSpace(const std::string& name, MemMap* mem_map,
+                         byte* begin, byte* end, byte* limit, size_t growth_limit)
+    : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
+      recent_free_pos_(0), lock_("allocation space lock", kAllocSpaceLock),
+      growth_limit_(growth_limit) {
+  size_t bitmap_index = bitmap_index_++;
+  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
+  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
+  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
+  live_bitmap_.reset(accounting::SpaceBitmap::Create(
+      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+      Begin(), Capacity()));
+  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
+  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
+      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+      Begin(), Capacity()));
+  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
+  for (auto& freed : recent_freed_objects_) {
+    freed.first = nullptr;
+    freed.second = nullptr;
+  }
+}
+
+MemMap* MallocSpace::CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
+                                  size_t* growth_limit, size_t* capacity, byte* requested_begin) {
+  // Sanity check arguments
+  if (starting_size > *initial_size) {
+    *initial_size = starting_size;
+  }
+  if (*initial_size > *growth_limit) {
+    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the initial size ("
+        << PrettySize(*initial_size) << ") is larger than its capacity ("
+        << PrettySize(*growth_limit) << ")";
+    return NULL;
+  }
+  if (*growth_limit > *capacity) {
+    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the growth limit capacity ("
+        << PrettySize(*growth_limit) << ") is larger than the capacity ("
+        << PrettySize(*capacity) << ")";
+    return NULL;
+  }
+
+  // Page align growth limit and capacity which will be used to manage mmapped storage
+  *growth_limit = RoundUp(*growth_limit, kPageSize);
+  *capacity = RoundUp(*capacity, kPageSize);
+
+  std::string error_msg;
+  MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, *capacity,
+                                         PROT_READ | PROT_WRITE, &error_msg);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
+               << PrettySize(*capacity) << ": " << error_msg;
+    return NULL;
+  }
+  return mem_map;
+}
+
+void MallocSpace::SwapBitmaps() {
+  live_bitmap_.swap(mark_bitmap_);
+  // Swap names to get more descriptive diagnostics.
+  std::string temp_name(live_bitmap_->GetName());
+  live_bitmap_->SetName(mark_bitmap_->GetName());
+  mark_bitmap_->SetName(temp_name);
+}
+
+mirror::Class* MallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
+  size_t pos = recent_free_pos_;
+  // Start at the most recently freed object and work our way back since there may be duplicates
+  // caused by dlmalloc reusing memory.
+  if (kRecentFreeCount > 0) {
+    for (size_t i = 0; i + 1 < kRecentFreeCount + 1; ++i) {
+      pos = pos != 0 ? pos - 1 : kRecentFreeMask;
+      if (recent_freed_objects_[pos].first == obj) {
+        return recent_freed_objects_[pos].second;
+      }
+    }
+  }
+  return nullptr;
+}
+
+void MallocSpace::RegisterRecentFree(mirror::Object* ptr) {
+  recent_freed_objects_[recent_free_pos_] = std::make_pair(ptr, ptr->GetClass());
+  recent_free_pos_ = (recent_free_pos_ + 1) & kRecentFreeMask;
+}
+
+void MallocSpace::SetGrowthLimit(size_t growth_limit) {
+  growth_limit = RoundUp(growth_limit, kPageSize);
+  growth_limit_ = growth_limit;
+  if (Size() > growth_limit_) {
+    end_ = begin_ + growth_limit;
+  }
+}
+
+void* MallocSpace::MoreCore(intptr_t increment) {
+  CheckMoreCoreForPrecondition();
+  byte* original_end = end_;
+  if (increment != 0) {
+    VLOG(heap) << "MallocSpace::MoreCore " << PrettySize(increment);
+    byte* new_end = original_end + increment;
+    if (increment > 0) {
+      // Should never be asked to increase the allocation beyond the capacity of the space. Enforced
+      // by mspace_set_footprint_limit.
+      CHECK_LE(new_end, Begin() + Capacity());
+      CHECK_MEMORY_CALL(mprotect, (original_end, increment, PROT_READ | PROT_WRITE), GetName());
+    } else {
+      // Should never be asked for negative footprint (ie before begin). Zero footprint is ok.
+      CHECK_GE(original_end + increment, Begin());
+      // Advise we don't need the pages and protect them
+      // TODO: by removing permissions to the pages we may be causing TLB shoot-down which can be
+      // expensive (note the same isn't true for giving permissions to a page as the protected
+      // page shouldn't be in a TLB). We should investigate performance impact of just
+      // removing ignoring the memory protection change here and in Space::CreateAllocSpace. It's
+      // likely just a useful debug feature.
+      size_t size = -increment;
+      CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
+      CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
+    }
+    // Update end_
+    end_ = new_end;
+  }
+  return original_end;
+}
+
+// Returns the old mark bitmap.
+accounting::SpaceBitmap* MallocSpace::BindLiveToMarkBitmap() {
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
+  temp_bitmap_.reset(mark_bitmap);
+  mark_bitmap_.reset(live_bitmap);
+  return mark_bitmap;
+}
+
+bool MallocSpace::HasBoundBitmaps() const {
+  return temp_bitmap_.get() != nullptr;
+}
+
+void MallocSpace::UnBindBitmaps() {
+  CHECK(HasBoundBitmaps());
+  // At this point, the temp_bitmap holds our old mark bitmap.
+  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
+  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
+  mark_bitmap_.reset(new_bitmap);
+  DCHECK(temp_bitmap_.get() == NULL);
+}
+
+MallocSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name) {
+  // For RosAlloc, revoke thread local runs before creating a new
+  // alloc space so that we won't mix thread local runs from different
+  // alloc spaces.
+  RevokeAllThreadLocalBuffers();
+  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
+  DCHECK(IsAligned<kPageSize>(begin_));
+  DCHECK(IsAligned<kPageSize>(end_));
+  size_t size = RoundUp(Size(), kPageSize);
+  // Trim the heap so that we minimize the size of the Zygote space.
+  Trim();
+  // TODO: Not hardcode these in?
+  const size_t starting_size = kPageSize;
+  const size_t initial_size = 2 * MB;
+  // Remaining size is for the new alloc space.
+  const size_t growth_limit = growth_limit_ - size;
+  const size_t capacity = Capacity() - size;
+  VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
+             << "End " << reinterpret_cast<const void*>(end_) << "\n"
+             << "Size " << size << "\n"
+             << "GrowthLimit " << growth_limit_ << "\n"
+             << "Capacity " << Capacity();
+  SetGrowthLimit(RoundUp(size, kPageSize));
+  SetFootprintLimit(RoundUp(size, kPageSize));
+  // FIXME: Do we need reference counted pointers here?
+  // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
+  VLOG(heap) << "Creating new AllocSpace: ";
+  VLOG(heap) << "Size " << GetMemMap()->Size();
+  VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
+  VLOG(heap) << "Capacity " << PrettySize(capacity);
+  // Remap the tail.
+  std::string error_msg;
+  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
+                                                    PROT_READ | PROT_WRITE, &error_msg));
+  CHECK(mem_map.get() != nullptr) << error_msg;
+  void* allocator = CreateAllocator(end_, starting_size, initial_size);
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
+  }
+  MallocSpace* alloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator,
+                                            end_, end, limit_, growth_limit);
+  SetLimit(End());
+  live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
+  CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
+  mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
+  CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
+  VLOG(heap) << "zygote space creation done";
+  return alloc_space;
+}
+
+void MallocSpace::Dump(std::ostream& os) const {
+  os << GetType()
+      << " begin=" << reinterpret_cast<void*>(Begin())
+      << ",end=" << reinterpret_cast<void*>(End())
+      << ",size=" << PrettySize(Size()) << ",capacity=" << PrettySize(Capacity())
+      << ",name=\"" << GetName() << "\"]";
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
new file mode 100644
index 0000000..0f882d3
--- /dev/null
+++ b/runtime/gc/space/malloc_space.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
+
+#include "space.h"
+
+#include <valgrind.h>
+#include <memcheck/memcheck.h>
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// TODO: Remove define macro
+#define CHECK_MEMORY_CALL(call, args, what) \
+  do { \
+    int rc = call args; \
+    if (UNLIKELY(rc != 0)) { \
+      errno = rc; \
+      PLOG(FATAL) << # call << " failed for " << what; \
+    } \
+  } while (false)
+
+// const bool kUseRosAlloc = true;
+
+// A common parent of DlMallocSpace and RosAllocSpace.
+class MallocSpace : public ContinuousMemMapAllocSpace {
+ public:
+  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
+
+  SpaceType GetType() const {
+    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
+      return kSpaceTypeZygoteSpace;
+    } else {
+      return kSpaceTypeAllocSpace;
+    }
+  }
+
+  // Allocate num_bytes without allowing the underlying space to grow.
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) = 0;
+  // Allocate num_bytes allowing the underlying space to grow.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
+  // Return the storage space required by obj.
+  virtual size_t AllocationSize(const mirror::Object* obj) = 0;
+  virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
+
+#ifndef NDEBUG
+  virtual void CheckMoreCoreForPrecondition() {}  // to be overridden in the debug build.
+#else
+  void CheckMoreCoreForPrecondition() {}  // no-op in the non-debug build.
+#endif
+
+  void* MoreCore(intptr_t increment);
+
+  // Hands unused pages back to the system.
+  virtual size_t Trim() = 0;
+
+  // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
+  // in use, indicated by num_bytes equaling zero.
+  virtual void Walk(WalkCallback callback, void* arg) = 0;
+
+  // Returns the number of bytes that the space has currently obtained from the system. This is
+  // greater or equal to the amount of live data in the space.
+  virtual size_t GetFootprint() = 0;
+
+  // Returns the number of bytes that the heap is allowed to obtain from the system via MoreCore.
+  virtual size_t GetFootprintLimit() = 0;
+
+  // Set the maximum number of bytes that the heap is allowed to obtain from the system via
+  // MoreCore. Note this is used to stop the mspace growing beyond the limit to Capacity. When
+  // allocations fail we GC before increasing the footprint limit and allowing the mspace to grow.
+  virtual void SetFootprintLimit(size_t limit) = 0;
+
+  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
+  // maximum reserved size of the heap.
+  void ClearGrowthLimit() {
+    growth_limit_ = NonGrowthLimitCapacity();
+  }
+
+  // Override capacity so that we only return the possibly limited capacity
+  size_t Capacity() const {
+    return growth_limit_;
+  }
+
+  // The total amount of memory reserved for the alloc space.
+  size_t NonGrowthLimitCapacity() const {
+    return GetMemMap()->Size();
+  }
+
+  accounting::SpaceBitmap* GetLiveBitmap() const {
+    return live_bitmap_.get();
+  }
+
+  accounting::SpaceBitmap* GetMarkBitmap() const {
+    return mark_bitmap_.get();
+  }
+
+  void Dump(std::ostream& os) const;
+
+  void SetGrowthLimit(size_t growth_limit);
+
+  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
+  void SwapBitmaps();
+
+  virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                                      byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
+
+  // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
+  MallocSpace* CreateZygoteSpace(const char* alloc_space_name);
+
+  virtual uint64_t GetBytesAllocated() = 0;
+  virtual uint64_t GetObjectsAllocated() = 0;
+  virtual uint64_t GetTotalBytesAllocated() = 0;
+  virtual uint64_t GetTotalObjectsAllocated() = 0;
+
+  // Returns the old mark bitmap.
+  accounting::SpaceBitmap* BindLiveToMarkBitmap();
+  bool HasBoundBitmaps() const;
+  void UnBindBitmaps();
+
+  // Returns the class of a recently freed object.
+  mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
+
+  // Used to ensure that failure happens when you free / allocate into an invalidated space. If we
+  // don't do this we may get heap corruption instead of a segfault at null.
+  virtual void InvalidateAllocator() = 0;
+
+ protected:
+  MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
+              byte* limit, size_t growth_limit);
+
+  static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
+                              size_t* growth_limit, size_t* capacity, byte* requested_begin);
+
+  virtual void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) = 0;
+
+  void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+
+  // Recent allocation buffer.
+  static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
+  static constexpr size_t kRecentFreeMask = kRecentFreeCount - 1;
+  std::pair<const mirror::Object*, mirror::Class*> recent_freed_objects_[kRecentFreeCount];
+  size_t recent_free_pos_;
+
+  static size_t bitmap_index_;
+
+  // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // The capacity of the alloc space until such time that ClearGrowthLimit is called.
+  // The underlying mem_map_ controls the maximum size we allow the heap to grow to. The growth
+  // limit is a value <= to the mem_map_ capacity used for ergonomic reasons because of the zygote.
+  // Prior to forking the zygote the heap will have a maximally sized mem_map_ but the growth_limit_
+  // will be set to a lower value. The growth_limit_ is used as the capacity of the alloc_space_,
+  // however, capacity normally can't vary. In the case of the growth_limit_ it can be cleared
+  // one time by a call to ClearGrowthLimit.
+  size_t growth_limit_;
+
+  friend class collector::MarkSweep;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MallocSpace);
+};
+
+// Number of bytes to use as a red zone (rdz). A red zone of this size will be placed before and
+// after each allocation. 8 bytes provides long/double alignment.
+static constexpr size_t kValgrindRedZoneBytes = 8;
+
+// A specialization of DlMallocSpace/RosAllocSpace that provides information to valgrind wrt allocations.
+template <typename BaseMallocSpaceType, typename AllocatorType>
+class ValgrindMallocSpace : public BaseMallocSpaceType {
+ public:
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    void* obj_with_rdz = BaseMallocSpaceType::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                                              bytes_allocated);
+    if (obj_with_rdz == NULL) {
+      return NULL;
+    }
+    mirror::Object* result = reinterpret_cast<mirror::Object*>(
+        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
+    // Make redzones as no access.
+    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
+    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
+    return result;
+  }
+
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    void* obj_with_rdz = BaseMallocSpaceType::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                                    bytes_allocated);
+    if (obj_with_rdz == NULL) {
+     return NULL;
+    }
+    mirror::Object* result = reinterpret_cast<mirror::Object*>(
+        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
+    // Make redzones as no access.
+    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
+    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
+    return result;
+  }
+
+  virtual size_t AllocationSize(const mirror::Object* obj) {
+    size_t result = BaseMallocSpaceType::AllocationSize(reinterpret_cast<const mirror::Object*>(
+        reinterpret_cast<const byte*>(obj) - kValgrindRedZoneBytes));
+    return result - 2 * kValgrindRedZoneBytes;
+  }
+
+  virtual size_t Free(Thread* self, mirror::Object* ptr) {
+    void* obj_after_rdz = reinterpret_cast<void*>(ptr);
+    void* obj_with_rdz = reinterpret_cast<byte*>(obj_after_rdz) - kValgrindRedZoneBytes;
+    // Make redzones undefined.
+    size_t allocation_size = BaseMallocSpaceType::AllocationSize(
+        reinterpret_cast<mirror::Object*>(obj_with_rdz));
+    VALGRIND_MAKE_MEM_UNDEFINED(obj_with_rdz, allocation_size);
+    size_t freed = BaseMallocSpaceType::Free(self, reinterpret_cast<mirror::Object*>(obj_with_rdz));
+    return freed - 2 * kValgrindRedZoneBytes;
+  }
+
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+    size_t freed = 0;
+    for (size_t i = 0; i < num_ptrs; i++) {
+      freed += Free(self, ptrs[i]);
+    }
+    return freed;
+  }
+
+  ValgrindMallocSpace(const std::string& name, MemMap* mem_map, AllocatorType allocator, byte* begin,
+                      byte* end, byte* limit, size_t growth_limit, size_t initial_size) :
+      BaseMallocSpaceType(name, mem_map, allocator, begin, end, limit, growth_limit) {
+    VALGRIND_MAKE_MEM_UNDEFINED(mem_map->Begin() + initial_size, mem_map->Size() - initial_size);
+  }
+
+  virtual ~ValgrindMallocSpace() {
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ValgrindMallocSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h
new file mode 100644
index 0000000..5de4265
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space-inl.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
+
+#include "gc/allocator/rosalloc-inl.h"
+#include "rosalloc_space.h"
+#include "thread.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+inline mirror::Object* RosAllocSpace::AllocNonvirtual(Thread* self, size_t num_bytes,
+                                                      size_t* bytes_allocated) {
+  mirror::Object* obj;
+  obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+  // RosAlloc zeroes memory internally.
+  return obj;
+}
+
+inline mirror::Object* RosAllocSpace::AllocWithoutGrowthLocked(Thread* self, size_t num_bytes,
+                                                               size_t* bytes_allocated) {
+  size_t rosalloc_size = 0;
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(
+      rosalloc_for_alloc_->Alloc(self, num_bytes,
+                                 &rosalloc_size));
+  if (LIKELY(result != NULL)) {
+    if (kDebugSpaces) {
+      CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
+            << ") not in bounds of allocation space " << *this;
+    }
+    DCHECK(bytes_allocated != NULL);
+    *bytes_allocated = rosalloc_size;
+  }
+  return result;
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
new file mode 100644
index 0000000..1f8e324
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -0,0 +1,299 @@
+
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rosalloc_space.h"
+
+#include "rosalloc_space-inl.h"
+#include "gc/accounting/card_table.h"
+#include "gc/heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "utils.h"
+
+#include <valgrind.h>
+#include <memcheck/memcheck.h>
+
+namespace art {
+namespace gc {
+namespace space {
+
+static const bool kPrefetchDuringRosAllocFreeList = true;
+
+RosAllocSpace::RosAllocSpace(const std::string& name, MemMap* mem_map,
+                             art::gc::allocator::RosAlloc* rosalloc, byte* begin, byte* end,
+                             byte* limit, size_t growth_limit)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit), rosalloc_(rosalloc),
+      rosalloc_for_alloc_(rosalloc) {
+  CHECK(rosalloc != NULL);
+}
+
+RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
+  uint64_t start_time = 0;
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    start_time = NanoTime();
+    VLOG(startup) << "RosAllocSpace::Create entering " << name
+                  << " initial_size=" << PrettySize(initial_size)
+                  << " growth_limit=" << PrettySize(growth_limit)
+                  << " capacity=" << PrettySize(capacity)
+                  << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
+  }
+
+  // Memory we promise to rosalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as rosalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
+    return NULL;
+  }
+  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size);
+  if (rosalloc == NULL) {
+    LOG(ERROR) << "Failed to initialize rosalloc for alloc space (" << name << ")";
+    return NULL;
+  }
+
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), name);
+  }
+
+  // Everything is set so record in immutable structure and leave
+  RosAllocSpace* space;
+  byte* begin = mem_map->Begin();
+  if (RUNNING_ON_VALGRIND > 0) {
+    space = new ValgrindMallocSpace<RosAllocSpace, art::gc::allocator::RosAlloc*>(
+        name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
+  } else {
+    space = new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
+  }
+  // We start out with only the initial size possibly containing objects.
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    LOG(INFO) << "RosAllocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
+        << " ) " << *space;
+  }
+  return space;
+}
+
+allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start, size_t initial_size) {
+  // clear errno to allow PLOG on error
+  errno = 0;
+  // create rosalloc using our backing storage starting at begin and
+  // with a footprint of morecore_start. When morecore_start bytes of
+  // memory is exhaused morecore will be called.
+  allocator::RosAlloc* rosalloc = new art::gc::allocator::RosAlloc(begin, morecore_start);
+  if (rosalloc != NULL) {
+    rosalloc->SetFootprintLimit(initial_size);
+  } else {
+    PLOG(ERROR) << "RosAlloc::Create failed";
+    }
+  return rosalloc;
+}
+
+mirror::Object* RosAllocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  return AllocNonvirtual(self, num_bytes, bytes_allocated);
+}
+
+mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* result;
+  {
+    MutexLock mu(self, lock_);
+    // Grow as much as possible within the space.
+    size_t max_allowed = Capacity();
+    rosalloc_->SetFootprintLimit(max_allowed);
+    // Try the allocation.
+    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+    // Shrink back down as small as possible.
+    size_t footprint = rosalloc_->Footprint();
+    rosalloc_->SetFootprintLimit(footprint);
+  }
+  // Note RosAlloc zeroes memory internally.
+  // Return the new allocation or NULL.
+  CHECK(!kDebugSpaces || result == NULL || Contains(result));
+  return result;
+}
+
+MallocSpace* RosAllocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                                           byte* begin, byte* end, byte* limit, size_t growth_limit) {
+  return new RosAllocSpace(name, mem_map, reinterpret_cast<allocator::RosAlloc*>(allocator),
+                           begin, end, limit, growth_limit);
+}
+
+size_t RosAllocSpace::Free(Thread* self, mirror::Object* ptr) {
+  if (kDebugSpaces) {
+    CHECK(ptr != NULL);
+    CHECK(Contains(ptr)) << "Free (" << ptr << ") not in bounds of heap " << *this;
+  }
+  const size_t bytes_freed = InternalAllocationSize(ptr);
+  total_bytes_freed_atomic_.fetch_add(bytes_freed);
+  ++total_objects_freed_atomic_;
+  if (kRecentFreeCount > 0) {
+    MutexLock mu(self, lock_);
+    RegisterRecentFree(ptr);
+  }
+  rosalloc_->Free(self, ptr);
+  return bytes_freed;
+}
+
+size_t RosAllocSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+  DCHECK(ptrs != NULL);
+
+  // Don't need the lock to calculate the size of the freed pointers.
+  size_t bytes_freed = 0;
+  for (size_t i = 0; i < num_ptrs; i++) {
+    mirror::Object* ptr = ptrs[i];
+    const size_t look_ahead = 8;
+    if (kPrefetchDuringRosAllocFreeList && i + look_ahead < num_ptrs) {
+      __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + look_ahead]));
+    }
+    bytes_freed += InternalAllocationSize(ptr);
+  }
+
+  if (kRecentFreeCount > 0) {
+    MutexLock mu(self, lock_);
+    for (size_t i = 0; i < num_ptrs; i++) {
+      RegisterRecentFree(ptrs[i]);
+    }
+  }
+
+  if (kDebugSpaces) {
+    size_t num_broken_ptrs = 0;
+    for (size_t i = 0; i < num_ptrs; i++) {
+      if (!Contains(ptrs[i])) {
+        num_broken_ptrs++;
+        LOG(ERROR) << "FreeList[" << i << "] (" << ptrs[i] << ") not in bounds of heap " << *this;
+      } else {
+        size_t size = rosalloc_->UsableSize(ptrs[i]);
+        memset(ptrs[i], 0xEF, size);
+      }
+    }
+    CHECK_EQ(num_broken_ptrs, 0u);
+  }
+
+  rosalloc_->BulkFree(self, reinterpret_cast<void**>(ptrs), num_ptrs);
+  total_bytes_freed_atomic_.fetch_add(bytes_freed);
+  total_objects_freed_atomic_.fetch_add(num_ptrs);
+  return bytes_freed;
+}
+
+// Callback from rosalloc when it needs to increase the footprint
+extern "C" void* art_heap_rosalloc_morecore(allocator::RosAlloc* rosalloc, intptr_t increment) {
+  Heap* heap = Runtime::Current()->GetHeap();
+  DCHECK(heap->GetNonMovingSpace()->IsRosAllocSpace());
+  DCHECK_EQ(heap->GetNonMovingSpace()->AsRosAllocSpace()->GetRosAlloc(), rosalloc);
+  return heap->GetNonMovingSpace()->MoreCore(increment);
+}
+
+// Virtual functions can't get inlined.
+inline size_t RosAllocSpace::InternalAllocationSize(const mirror::Object* obj) {
+  return AllocationSizeNonvirtual(obj);
+}
+
+size_t RosAllocSpace::AllocationSize(const mirror::Object* obj) {
+  return InternalAllocationSize(obj);
+}
+
+size_t RosAllocSpace::Trim() {
+  MutexLock mu(Thread::Current(), lock_);
+  // Trim to release memory at the end of the space.
+  rosalloc_->Trim();
+  // No inspect_all necessary here as trimming of pages is built-in.
+  return 0;
+}
+
+void RosAllocSpace::Walk(void(*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                         void* arg) {
+  InspectAllRosAlloc(callback, arg);
+  callback(NULL, NULL, 0, arg);  // Indicate end of a space.
+}
+
+size_t RosAllocSpace::GetFootprint() {
+  MutexLock mu(Thread::Current(), lock_);
+  return rosalloc_->Footprint();
+}
+
+size_t RosAllocSpace::GetFootprintLimit() {
+  MutexLock mu(Thread::Current(), lock_);
+  return rosalloc_->FootprintLimit();
+}
+
+void RosAllocSpace::SetFootprintLimit(size_t new_size) {
+  MutexLock mu(Thread::Current(), lock_);
+  VLOG(heap) << "RosAllocSpace::SetFootprintLimit " << PrettySize(new_size);
+  // Compare against the actual footprint, rather than the Size(), because the heap may not have
+  // grown all the way to the allowed size yet.
+  size_t current_space_size = rosalloc_->Footprint();
+  if (new_size < current_space_size) {
+    // Don't let the space grow any more.
+    new_size = current_space_size;
+  }
+  rosalloc_->SetFootprintLimit(new_size);
+}
+
+uint64_t RosAllocSpace::GetBytesAllocated() {
+  size_t bytes_allocated = 0;
+  InspectAllRosAlloc(art::gc::allocator::RosAlloc::BytesAllocatedCallback, &bytes_allocated);
+  return bytes_allocated;
+}
+
+uint64_t RosAllocSpace::GetObjectsAllocated() {
+  size_t objects_allocated = 0;
+  InspectAllRosAlloc(art::gc::allocator::RosAlloc::ObjectsAllocatedCallback, &objects_allocated);
+  return objects_allocated;
+}
+
+void RosAllocSpace::InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                                       void* arg) NO_THREAD_SAFETY_ANALYSIS {
+  // TODO: NO_THREAD_SAFETY_ANALYSIS.
+  Thread* self = Thread::Current();
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // The mutators are already suspended. For example, a call path
+    // from SignalCatcher::HandleSigQuit().
+    rosalloc_->InspectAll(callback, arg);
+  } else {
+    // The mutators are not suspended yet.
+    DCHECK(!Locks::mutator_lock_->IsSharedHeld(self));
+    ThreadList* tl = Runtime::Current()->GetThreadList();
+    tl->SuspendAll();
+    {
+      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+      MutexLock mu2(self, *Locks::thread_list_lock_);
+      rosalloc_->InspectAll(callback, arg);
+    }
+    tl->ResumeAll();
+  }
+}
+
+void RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) {
+  rosalloc_->RevokeThreadLocalRuns(thread);
+}
+
+void RosAllocSpace::RevokeAllThreadLocalBuffers() {
+  rosalloc_->RevokeAllThreadLocalRuns();
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
new file mode 100644
index 0000000..6311580
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
+
+#include "gc/allocator/rosalloc.h"
+#include "malloc_space.h"
+#include "space.h"
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// An alloc space is a space where objects may be allocated and garbage collected.
+class RosAllocSpace : public MallocSpace {
+ public:
+  // Create a RosAllocSpace with the requested sizes. The requested
+  // base address is not guaranteed to be granted, if it is required,
+  // the caller should call Begin on the returned space to confirm the
+  // request was granted.
+  static RosAllocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                               size_t capacity, byte* requested_begin);
+
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+  virtual size_t AllocationSize(const mirror::Object* obj);
+  virtual size_t Free(Thread* self, mirror::Object* ptr);
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
+
+  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+      NO_THREAD_SAFETY_ANALYSIS {
+    // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held.
+    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+    // obj is a valid object. Use its class in the header to get the size.
+    size_t size = obj->SizeOf();
+    size_t size_by_size = rosalloc_->UsableSize(size);
+    if (kIsDebugBuild) {
+      size_t size_by_ptr = rosalloc_->UsableSize(obj_ptr);
+      if (size_by_size != size_by_ptr) {
+        LOG(INFO) << "Found a bad sized obj of size " << size
+                  << " at " << std::hex << reinterpret_cast<intptr_t>(obj_ptr) << std::dec
+                  << " size_by_size=" << size_by_size << " size_by_ptr=" << size_by_ptr;
+      }
+      DCHECK_EQ(size_by_size, size_by_ptr);
+    }
+    return size_by_size;
+  }
+
+  art::gc::allocator::RosAlloc* GetRosAlloc() {
+    return rosalloc_;
+  }
+
+  size_t Trim();
+  void Walk(WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
+  size_t GetFootprint();
+  size_t GetFootprintLimit();
+  void SetFootprintLimit(size_t limit);
+
+  MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                              byte* begin, byte* end, byte* limit, size_t growth_limit);
+
+  uint64_t GetBytesAllocated();
+  uint64_t GetObjectsAllocated();
+  uint64_t GetTotalBytesAllocated() {
+    return GetBytesAllocated() + total_bytes_freed_atomic_;
+  }
+  uint64_t GetTotalObjectsAllocated() {
+    return GetObjectsAllocated() + total_objects_freed_atomic_;
+  }
+
+  void RevokeThreadLocalBuffers(Thread* thread);
+  void RevokeAllThreadLocalBuffers();
+
+  // Returns the class of a recently freed object.
+  mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
+
+  virtual void InvalidateAllocator() {
+    rosalloc_for_alloc_ = NULL;
+  }
+
+  virtual bool IsRosAllocSpace() const {
+    return true;
+  }
+  virtual RosAllocSpace* AsRosAllocSpace() {
+    return this;
+  }
+
+ protected:
+  RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
+                byte* begin, byte* end, byte* limit, size_t growth_limit);
+
+ private:
+  size_t InternalAllocationSize(const mirror::Object* obj);
+  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
+    return CreateRosAlloc(base, morecore_start, initial_size);
+  }
+  static allocator::RosAlloc* CreateRosAlloc(void* base, size_t morecore_start, size_t initial_size);
+
+
+  void InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                          void* arg)
+      LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
+
+  // Approximate number of bytes and objects which have been deallocated in the space.
+  AtomicInteger total_bytes_freed_atomic_;
+  AtomicInteger total_objects_freed_atomic_;
+
+  // Underlying rosalloc.
+  art::gc::allocator::RosAlloc* const rosalloc_;
+
+  // A rosalloc pointer used for allocation. Equals to what rosalloc_
+  // points to or nullptr after InvalidateAllocator() is called.
+  art::gc::allocator::RosAlloc* rosalloc_for_alloc_;
+
+  friend class collector::MarkSweep;
+
+  DISALLOW_COPY_AND_ASSIGN(RosAllocSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
diff --git a/runtime/gc/space/space-inl.h b/runtime/gc/space/space-inl.h
index 2c3b93c..0c1d7a2 100644
--- a/runtime/gc/space/space-inl.h
+++ b/runtime/gc/space/space-inl.h
@@ -27,18 +27,29 @@
 namespace space {
 
 inline ImageSpace* Space::AsImageSpace() {
-  DCHECK_EQ(GetType(), kSpaceTypeImageSpace);
+  DCHECK(IsImageSpace());
   return down_cast<ImageSpace*>(down_cast<MemMapSpace*>(this));
 }
 
-inline DlMallocSpace* Space::AsDlMallocSpace() {
+inline MallocSpace* Space::AsMallocSpace() {
   DCHECK(GetType() == kSpaceTypeAllocSpace || GetType() == kSpaceTypeZygoteSpace);
-  return down_cast<DlMallocSpace*>(down_cast<MemMapSpace*>(this));
+  DCHECK(IsDlMallocSpace() || IsRosAllocSpace());
+  return down_cast<MallocSpace*>(down_cast<MemMapSpace*>(this));
 }
 
 inline LargeObjectSpace* Space::AsLargeObjectSpace() {
-  DCHECK_EQ(GetType(), kSpaceTypeLargeObjectSpace);
-  return reinterpret_cast<LargeObjectSpace*>(this);
+  DCHECK(IsLargeObjectSpace());
+  return down_cast<LargeObjectSpace*>(this);
+}
+
+inline ContinuousSpace* Space::AsContinuousSpace() {
+  DCHECK(IsContinuousSpace());
+  return down_cast<ContinuousSpace*>(this);
+}
+
+inline DiscontinuousSpace* Space::AsDiscontinuousSpace() {
+  DCHECK(IsDiscontinuousSpace());
+  return down_cast<DiscontinuousSpace*>(this);
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index de48b74..8eb17e0 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -34,7 +34,6 @@
   return os;
 }
 
-
 DiscontinuousSpace::DiscontinuousSpace(const std::string& name,
                                        GcRetentionPolicy gc_retention_policy) :
     Space(name, gc_retention_policy),
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 6dd7952..ca39175 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -42,7 +42,13 @@
 
 namespace space {
 
+class AllocSpace;
+class BumpPointerSpace;
+class ContinuousSpace;
+class DiscontinuousSpace;
+class MallocSpace;
 class DlMallocSpace;
+class RosAllocSpace;
 class ImageSpace;
 class LargeObjectSpace;
 
@@ -64,6 +70,7 @@
   kSpaceTypeImageSpace,
   kSpaceTypeAllocSpace,
   kSpaceTypeZygoteSpace,
+  kSpaceTypeBumpPointerSpace,
   kSpaceTypeLargeObjectSpace,
 };
 std::ostream& operator<<(std::ostream& os, const SpaceType& space_type);
@@ -102,23 +109,65 @@
   ImageSpace* AsImageSpace();
 
   // Is this a dlmalloc backed allocation space?
-  bool IsDlMallocSpace() const {
+  bool IsMallocSpace() const {
     SpaceType type = GetType();
     return type == kSpaceTypeAllocSpace || type == kSpaceTypeZygoteSpace;
   }
-  DlMallocSpace* AsDlMallocSpace();
+  MallocSpace* AsMallocSpace();
+
+  virtual bool IsDlMallocSpace() const {
+    return false;
+  }
+  virtual DlMallocSpace* AsDlMallocSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
+  virtual bool IsRosAllocSpace() const {
+    return false;
+  }
+  virtual RosAllocSpace* AsRosAllocSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
 
   // Is this the space allocated into by the Zygote and no-longer in use?
   bool IsZygoteSpace() const {
     return GetType() == kSpaceTypeZygoteSpace;
   }
 
+  // Is this space a bump pointer space?
+  bool IsBumpPointerSpace() const {
+    return GetType() == kSpaceTypeBumpPointerSpace;
+  }
+  virtual BumpPointerSpace* AsBumpPointerSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
+
   // Does this space hold large objects and implement the large object space abstraction?
   bool IsLargeObjectSpace() const {
     return GetType() == kSpaceTypeLargeObjectSpace;
   }
   LargeObjectSpace* AsLargeObjectSpace();
 
+  virtual bool IsContinuousSpace() const {
+    return false;
+  }
+  ContinuousSpace* AsContinuousSpace();
+
+  virtual bool IsDiscontinuousSpace() const {
+    return false;
+  }
+  DiscontinuousSpace* AsDiscontinuousSpace();
+
+  virtual bool IsAllocSpace() const {
+    return false;
+  }
+  virtual AllocSpace* AsAllocSpace() {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+
   virtual ~Space() {}
 
  protected:
@@ -131,13 +180,13 @@
   // Name of the space that may vary due to the Zygote fork.
   std::string name_;
 
- private:
+ protected:
   // When should objects within this space be reclaimed? Not constant as we vary it in the case
   // of Zygote forking.
   GcRetentionPolicy gc_retention_policy_;
 
+ private:
   friend class art::gc::Heap;
-
   DISALLOW_COPY_AND_ASSIGN(Space);
 };
 std::ostream& operator<<(std::ostream& os, const Space& space);
@@ -168,6 +217,16 @@
   // Returns how many bytes were freed.
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
 
+  // Revoke any sort of thread-local buffers that are used to speed up
+  // allocations for the given thread, if the alloc space
+  // implementation uses any. No-op by default.
+  virtual void RevokeThreadLocalBuffers(Thread* /*thread*/) {}
+
+  // Revoke any sort of thread-local buffers that are used to speed up
+  // allocations for all the threads, if the alloc space
+  // implementation uses any. No-op by default.
+  virtual void RevokeAllThreadLocalBuffers() {}
+
  protected:
   AllocSpace() {}
   virtual ~AllocSpace() {}
@@ -180,16 +239,31 @@
 // continuous spaces can be marked in the card table.
 class ContinuousSpace : public Space {
  public:
-  // Address at which the space begins
+  // Address at which the space begins.
   byte* Begin() const {
     return begin_;
   }
 
-  // Address at which the space ends, which may vary as the space is filled.
+  // Current address at which the space ends, which may vary as the space is filled.
   byte* End() const {
     return end_;
   }
 
+  // The end of the address range covered by the space.
+  byte* Limit() const {
+    return limit_;
+  }
+
+  // Change the end of the space. Be careful with use since changing the end of a space to an
+  // invalid value may break the GC.
+  void SetEnd(byte* end) {
+    end_ = end;
+  }
+
+  void SetLimit(byte* limit) {
+    limit_ = limit;
+  }
+
   // Current size of space
   size_t Size() const {
     return End() - Begin();
@@ -198,31 +272,42 @@
   virtual accounting::SpaceBitmap* GetLiveBitmap() const = 0;
   virtual accounting::SpaceBitmap* GetMarkBitmap() const = 0;
 
+  // Maximum which the mapped space can grow to.
+  virtual size_t Capacity() const {
+    return Limit() - Begin();
+  }
+
   // Is object within this space? We check to see if the pointer is beyond the end first as
   // continuous spaces are iterated over from low to high.
   bool HasAddress(const mirror::Object* obj) const {
     const byte* byte_ptr = reinterpret_cast<const byte*>(obj);
-    return byte_ptr < End() && byte_ptr >= Begin();
+    return byte_ptr >= Begin() && byte_ptr < Limit();
   }
 
   bool Contains(const mirror::Object* obj) const {
     return HasAddress(obj);
   }
 
+  virtual bool IsContinuousSpace() const {
+    return true;
+  }
+
   virtual ~ContinuousSpace() {}
 
  protected:
   ContinuousSpace(const std::string& name, GcRetentionPolicy gc_retention_policy,
-                  byte* begin, byte* end) :
-      Space(name, gc_retention_policy), begin_(begin), end_(end) {
+                  byte* begin, byte* end, byte* limit) :
+      Space(name, gc_retention_policy), begin_(begin), end_(end), limit_(limit) {
   }
 
-
   // The beginning of the storage for fast access.
-  byte* const begin_;
+  byte* begin_;
 
   // Current end of the space.
-  byte* end_;
+  byte* volatile end_;
+
+  // Limit of the space.
+  byte* limit_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(ContinuousSpace);
@@ -241,6 +326,10 @@
     return mark_objects_.get();
   }
 
+  virtual bool IsDiscontinuousSpace() const {
+    return true;
+  }
+
   virtual ~DiscontinuousSpace() {}
 
  protected:
@@ -255,25 +344,12 @@
 
 class MemMapSpace : public ContinuousSpace {
  public:
-  // Maximum which the mapped space can grow to.
-  virtual size_t Capacity() const {
-    return mem_map_->Size();
-  }
-
   // Size of the space without a limit on its growth. By default this is just the Capacity, but
   // for the allocation space we support starting with a small heap and then extending it.
   virtual size_t NonGrowthLimitCapacity() const {
     return Capacity();
   }
 
- protected:
-  MemMapSpace(const std::string& name, MemMap* mem_map, size_t initial_size,
-              GcRetentionPolicy gc_retention_policy)
-      : ContinuousSpace(name, gc_retention_policy,
-                        mem_map->Begin(), mem_map->Begin() + initial_size),
-        mem_map_(mem_map) {
-  }
-
   MemMap* GetMemMap() {
     return mem_map_.get();
   }
@@ -282,13 +358,45 @@
     return mem_map_.get();
   }
 
- private:
+ protected:
+  MemMapSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end, byte* limit,
+              GcRetentionPolicy gc_retention_policy)
+      : ContinuousSpace(name, gc_retention_policy, begin, end, limit),
+        mem_map_(mem_map) {
+  }
+
   // Underlying storage of the space
   UniquePtr<MemMap> mem_map_;
 
+ private:
   DISALLOW_COPY_AND_ASSIGN(MemMapSpace);
 };
 
+// Used by the heap compaction interface to enable copying from one type of alloc space to another.
+class ContinuousMemMapAllocSpace : public MemMapSpace, public AllocSpace {
+ public:
+  virtual bool IsAllocSpace() const {
+    return true;
+  }
+
+  virtual AllocSpace* AsAllocSpace() {
+    return this;
+  }
+
+  virtual void Clear() {
+    LOG(FATAL) << "Unimplemented";
+  }
+
+ protected:
+  ContinuousMemMapAllocSpace(const std::string& name, MemMap* mem_map, byte* begin,
+                             byte* end, byte* limit, GcRetentionPolicy gc_retention_policy)
+      : MemMapSpace(name, mem_map, begin, end, limit, gc_retention_policy) {
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ContinuousMemMapAllocSpace);
+};
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 455168c..6b597ae 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -20,6 +20,8 @@
 #include "common_test.h"
 #include "globals.h"
 #include "UniquePtr.h"
+#include "mirror/array-inl.h"
+#include "mirror/object-inl.h"
 
 #include <stdint.h>
 
@@ -33,8 +35,25 @@
                                            int round, size_t growth_limit);
   void SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size);
 
-  void AddContinuousSpace(ContinuousSpace* space) {
-    Runtime::Current()->GetHeap()->AddContinuousSpace(space);
+  void AddSpace(ContinuousSpace* space) {
+    // For RosAlloc, revoke the thread local runs before moving onto a
+    // new alloc space.
+    Runtime::Current()->GetHeap()->RevokeAllThreadLocalBuffers();
+    Runtime::Current()->GetHeap()->AddSpace(space);
+  }
+  void InstallClass(mirror::Object* o, size_t size) NO_THREAD_SAFETY_ANALYSIS {
+    // Note the minimum size, which is the size of a zero-length byte array, is 12.
+    EXPECT_GE(size, static_cast<size_t>(12));
+    SirtRef<mirror::ClassLoader> null_loader(Thread::Current(), NULL);
+    mirror::Class* byte_array_class = Runtime::Current()->GetClassLinker()->FindClass("[B", null_loader);
+    EXPECT_TRUE(byte_array_class != NULL);
+    o->SetClass(byte_array_class);
+    mirror::Array* arr = o->AsArray();
+    // size_t header_size = sizeof(mirror::Object) + 4;
+    size_t header_size = arr->DataOffset(1).Uint32Value();
+    int32_t length = size - header_size;
+    arr->SetLength(length);
+    EXPECT_EQ(arr->SizeOf(), size);
   }
 };
 
@@ -87,16 +106,17 @@
 // the GC works with the ZygoteSpace.
 TEST_F(SpaceTest, ZygoteSpace) {
     size_t dummy = 0;
-    DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
+    MallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
     ASSERT_TRUE(space != NULL);
 
     // Make space findable to the heap, will also delete space when runtime is cleaned up
-    AddContinuousSpace(space);
+    AddSpace(space);
     Thread* self = Thread::Current();
 
     // Succeeds, fits without adjusting the footprint limit.
     mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
+    InstallClass(ptr1, 1 * MB);
 
     // Fails, requires a higher footprint limit.
     mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -107,6 +127,7 @@
     mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
     EXPECT_TRUE(ptr3 != NULL);
     EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+    InstallClass(ptr3, 8 * MB);
 
     // Fails, requires a higher footprint limit.
     mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
@@ -123,8 +144,9 @@
     EXPECT_LE(8U * MB, free3);
 
     // Succeeds, now that memory has been freed.
-    void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
+    mirror::Object* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
     EXPECT_TRUE(ptr6 != NULL);
+    InstallClass(ptr6, 9 * MB);
 
     // Final clean up.
     size_t free1 = space->AllocationSize(ptr1);
@@ -136,11 +158,12 @@
     space = space->CreateZygoteSpace("alloc space");
 
     // Make space findable to the heap, will also delete space when runtime is cleaned up
-    AddContinuousSpace(space);
+    AddSpace(space);
 
     // Succeeds, fits without adjusting the footprint limit.
     ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
+    InstallClass(ptr1, 1 * MB);
 
     // Fails, requires a higher footprint limit.
     ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -149,6 +172,7 @@
     // Succeeds, adjusts the footprint.
     ptr3 = space->AllocWithGrowth(self, 2 * MB, &dummy);
     EXPECT_TRUE(ptr3 != NULL);
+    InstallClass(ptr3, 2 * MB);
     space->Free(self, ptr3);
 
     // Final clean up.
@@ -164,11 +188,12 @@
   Thread* self = Thread::Current();
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddContinuousSpace(space);
+  AddSpace(space);
 
   // Succeeds, fits without adjusting the footprint limit.
   mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
   EXPECT_TRUE(ptr1 != NULL);
+  InstallClass(ptr1, 1 * MB);
 
   // Fails, requires a higher footprint limit.
   mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -179,6 +204,7 @@
   mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
   EXPECT_TRUE(ptr3 != NULL);
   EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  InstallClass(ptr3, 8 * MB);
 
   // Fails, requires a higher footprint limit.
   mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
@@ -195,8 +221,9 @@
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
-  void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
+  mirror::Object* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
   EXPECT_TRUE(ptr6 != NULL);
+  InstallClass(ptr6, 9 * MB);
 
   // Final clean up.
   size_t free1 = space->AllocationSize(ptr1);
@@ -270,7 +297,7 @@
   ASSERT_TRUE(space != NULL);
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddContinuousSpace(space);
+  AddSpace(space);
   Thread* self = Thread::Current();
 
   // Succeeds, fits without adjusting the max allowed footprint.
@@ -278,8 +305,9 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
+    InstallClass(lots_of_objects[i], 16);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
@@ -292,8 +320,9 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size);
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
+    InstallClass(lots_of_objects[i], 1024);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
@@ -310,22 +339,20 @@
     // No allocation can succeed
     return;
   }
-  // Mspace for raw dlmalloc operations
-  void* mspace = space->GetMspace();
 
-  // mspace's footprint equals amount of resources requested from system
-  size_t footprint = mspace_footprint(mspace);
+  // The space's footprint equals amount of resources requested from system
+  size_t footprint = space->GetFootprint();
 
-  // mspace must at least have its book keeping allocated
+  // The space must at least have its book keeping allocated
   EXPECT_GT(footprint, 0u);
 
-  // mspace but it shouldn't exceed the initial size
+  // But it shouldn't exceed the initial size
   EXPECT_LE(footprint, growth_limit);
 
   // space's size shouldn't exceed the initial size
   EXPECT_LE(space->Size(), growth_limit);
 
-  // this invariant should always hold or else the mspace has grown to be larger than what the
+  // this invariant should always hold or else the space has grown to be larger than what the
   // space believes its size is (which will break invariants)
   EXPECT_GE(space->Size(), footprint);
 
@@ -345,8 +372,9 @@
         alloc_size = object_size;
       } else {
         alloc_size = test_rand(&rand_seed) % static_cast<size_t>(-object_size);
-        if (alloc_size < 8) {
-          alloc_size = 8;
+        // Note the minimum size, which is the size of a zero-length byte array, is 12.
+        if (alloc_size < 12) {
+          alloc_size = 12;
         }
       }
       mirror::Object* object;
@@ -356,9 +384,10 @@
       } else {
         object = space->AllocWithGrowth(self, alloc_size, &bytes_allocated);
       }
-      footprint = mspace_footprint(mspace);
+      footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
       if (object != NULL) {  // allocation succeeded
+        InstallClass(object, alloc_size);
         lots_of_objects.get()[i] = object;
         size_t allocation_size = space->AllocationSize(object);
         EXPECT_EQ(bytes_allocated, allocation_size);
@@ -395,7 +424,7 @@
     space->Trim();
 
     // Bounds sanity
-    footprint = mspace_footprint(mspace);
+    footprint = space->GetFootprint();
     EXPECT_LE(amount_allocated, growth_limit);
     EXPECT_GE(footprint, amount_allocated);
     EXPECT_LE(footprint, growth_limit);
@@ -421,13 +450,21 @@
       space->Free(self, object);
       lots_of_objects.get()[i] = NULL;
       amount_allocated -= allocation_size;
-      footprint = mspace_footprint(mspace);
+      footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
     }
 
     free_increment >>= 1;
   }
 
+  // The space has become empty here before allocating a large object
+  // below. For RosAlloc, revoke thread-local runs, which are kept
+  // even when empty for a performance reason, so that they won't
+  // cause the following large object allocation to fail due to
+  // potential fragmentation. Note they are normally revoked at each
+  // GC (but no GC here.)
+  space->RevokeAllThreadLocalBuffers();
+
   // All memory was released, try a large allocation to check freed memory is being coalesced
   mirror::Object* large_object;
   size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4);
@@ -438,9 +475,10 @@
     large_object = space->AllocWithGrowth(self, three_quarters_space, &bytes_allocated);
   }
   EXPECT_TRUE(large_object != NULL);
+  InstallClass(large_object, three_quarters_space);
 
   // Sanity check footprint
-  footprint = mspace_footprint(mspace);
+  footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
   EXPECT_GE(space->Size(), footprint);
   EXPECT_LE(space->Size(), growth_limit);
@@ -449,7 +487,7 @@
   space->Free(self, large_object);
 
   // Sanity check footprint
-  footprint = mspace_footprint(mspace);
+  footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
   EXPECT_GE(space->Size(), footprint);
   EXPECT_LE(space->Size(), growth_limit);
@@ -467,7 +505,7 @@
   EXPECT_EQ(space->NonGrowthLimitCapacity(), capacity);
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddContinuousSpace(space);
+  AddSpace(space);
 
   // In this round we don't allocate with growth and therefore can't grow past the initial size.
   // This effectively makes the growth_limit the initial_size, so assert this.
@@ -488,8 +526,8 @@
   }
 
 // Each size test is its own test so that we get a fresh heap each time
-TEST_F(SpaceTest, SizeFootPrintGrowthLimitAndTrim_AllocationsOf_8B) {
-  SizeFootPrintGrowthLimitAndTrimDriver(8);
+TEST_F(SpaceTest, SizeFootPrintGrowthLimitAndTrim_AllocationsOf_12B) {
+  SizeFootPrintGrowthLimitAndTrimDriver(12);
 }
 TEST_SizeFootPrintGrowthLimitAndTrim(16B, 16)
 TEST_SizeFootPrintGrowthLimitAndTrim(24B, 24)
diff --git a/runtime/globals.h b/runtime/globals.h
index 31574ff..c2fe67e 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -26,53 +26,68 @@
 typedef intptr_t word;
 typedef uintptr_t uword;
 
-const size_t KB = 1024;
-const size_t MB = KB * KB;
-const size_t GB = KB * KB * KB;
+static constexpr size_t KB = 1024;
+static constexpr size_t MB = KB * KB;
+static constexpr size_t GB = KB * KB * KB;
 
-const size_t kWordSize = sizeof(word);
-const size_t kPointerSize = sizeof(void*);
+static constexpr size_t kWordSize = sizeof(word);
+static constexpr size_t kPointerSize = sizeof(void*);
 
-const size_t kBitsPerByte = 8;
-const size_t kBitsPerByteLog2 = 3;
-const int kBitsPerWord = kWordSize * kBitsPerByte;
-const size_t kWordHighBitMask = 1 << (kBitsPerWord - 1);
+static constexpr size_t kBitsPerByte = 8;
+static constexpr size_t kBitsPerByteLog2 = 3;
+static constexpr int kBitsPerWord = kWordSize * kBitsPerByte;
+static constexpr size_t kWordHighBitMask = 1 << (kBitsPerWord - 1);
 
 // Required stack alignment
-const size_t kStackAlignment = 16;
+static constexpr size_t kStackAlignment = 16;
 
 // Required object alignment
-const size_t kObjectAlignment = 8;
+static constexpr size_t kObjectAlignment = 8;
 
 // ARM instruction alignment. ARM processors require code to be 4-byte aligned,
 // but ARM ELF requires 8..
-const size_t kArmAlignment = 8;
+static constexpr size_t kArmAlignment = 8;
 
 // MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
 // TODO: Can this be 4?
-const size_t kMipsAlignment = 8;
+static constexpr size_t kMipsAlignment = 8;
 
 // X86 instruction alignment. This is the recommended alignment for maximum performance.
-const size_t kX86Alignment = 16;
+static constexpr size_t kX86Alignment = 16;
 
 // System page size. We check this against sysconf(_SC_PAGE_SIZE) at runtime, but use a simple
 // compile-time constant so the compiler can generate better code.
-const int kPageSize = 4096;
+static constexpr int kPageSize = 4096;
 
 // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
 #if defined(NDEBUG)
-const bool kIsDebugBuild = false;
+static constexpr bool kIsDebugBuild = false;
 #else
-const bool kIsDebugBuild = true;
+static constexpr bool kIsDebugBuild = true;
 #endif
 
 // Whether or not this is a target (vs host) build. Useful in conditionals where ART_TARGET isn't.
 #if defined(ART_TARGET)
-const bool kIsTargetBuild = true;
+static constexpr bool kIsTargetBuild = true;
 #else
-const bool kIsTargetBuild = false;
+static constexpr bool kIsTargetBuild = false;
 #endif
 
+#if defined(ART_USE_PORTABLE_COMPILER)
+static constexpr bool kUsePortableCompiler = true;
+#else
+static constexpr bool kUsePortableCompiler = false;
+#endif
+
+// Garbage collector constants.
+static constexpr bool kMovingCollector = true && !kUsePortableCompiler;
+// True if we allow moving classes.
+static constexpr bool kMovingClasses = false;
+// True if we allow moving fields.
+static constexpr bool kMovingFields = false;
+// True if we allow moving methods.
+static constexpr bool kMovingMethods = false;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_GLOBALS_H_
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 67620a0..9f899e8 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -537,7 +537,7 @@
     HprofRecord* rec = &current_record_;
 
     for (StringMapIterator it = strings_.begin(); it != strings_.end(); ++it) {
-      std::string string((*it).first);
+      const std::string& string = (*it).first;
       size_t id = (*it).second;
 
       int err = current_record_.StartNewRecord(header_fp_, HPROF_TAG_STRING, HPROF_TIME);
diff --git a/runtime/indenter.h b/runtime/indenter.h
index c432e1b..d055d4e 100644
--- a/runtime/indenter.h
+++ b/runtime/indenter.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_INDENTER_H_
 #define ART_RUNTIME_INDENTER_H_
 
+#include "base/logging.h"
 #include "base/macros.h"
 #include <streambuf>
 
@@ -30,16 +31,28 @@
 
  private:
   int_type overflow(int_type c) {
-    if (c != std::char_traits<char>::eof()) {
-      if (indent_next_) {
-        for (size_t i = 0; i < count_; ++i) {
-          out_sbuf_->sputc(text_);
+    if (UNLIKELY(c == std::char_traits<char>::eof())) {
+      out_sbuf_->pubsync();
+      return c;
+    }
+    if (indent_next_) {
+      for (size_t i = 0; i < count_; ++i) {
+        int_type r = out_sbuf_->sputc(text_);
+        if (UNLIKELY(r != text_)) {
+          out_sbuf_->pubsync();
+          r = out_sbuf_->sputc(text_);
+          CHECK_EQ(r, text_) << "Error writing to buffer. Disk full?";
         }
       }
-      out_sbuf_->sputc(c);
-      indent_next_ = (c == '\n');
     }
-    return std::char_traits<char>::not_eof(c);
+    indent_next_ = (c == '\n');
+    int_type r = out_sbuf_->sputc(c);
+    if (UNLIKELY(r != c)) {
+      out_sbuf_->pubsync();
+      r = out_sbuf_->sputc(c);
+      CHECK_EQ(r, c) << "Error writing to buffer. Disk full?";
+    }
+    return r;
   }
 
   int sync() {
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 8cf486f..4ad9c63 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -39,8 +39,13 @@
 #include "thread_list.h"
 
 namespace art {
+
+extern void SetQuickAllocEntryPointsInstrumented(bool instrumented);
+
 namespace instrumentation {
 
+const bool kVerboseInstrumentation = false;
+
 // Do we want to deoptimize for method entry and exit listeners or just try to intercept
 // invocations? Deoptimization forces all code to run in the interpreter and considerably hurts the
 // application's performance.
@@ -54,10 +59,7 @@
 
 bool Instrumentation::InstallStubsForClass(mirror::Class* klass) {
   bool uninstall = !entry_exit_stubs_installed_ && !interpreter_stubs_installed_;
-  ClassLinker* class_linker = NULL;
-  if (uninstall) {
-    class_linker = Runtime::Current()->GetClassLinker();
-  }
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   bool is_initialized = klass->IsInitialized();
   for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
     mirror::ArtMethod* method = klass->GetDirectMethod(i);
@@ -73,7 +75,14 @@
         }
       } else {  // !uninstall
         if (!interpreter_stubs_installed_ || method->IsNative()) {
-          new_code = GetQuickInstrumentationEntryPoint();
+          // Do not overwrite resolution trampoline. When the trampoline initializes the method's
+          // class, all its static methods' code will be set to the instrumentation entry point.
+          // For more details, see ClassLinker::FixupStaticTrampolines.
+          if (is_initialized || !method->IsStatic() || method->IsConstructor()) {
+            new_code = GetQuickInstrumentationEntryPoint();
+          } else {
+            new_code = GetResolutionTrampoline(class_linker);
+          }
         } else {
           new_code = GetCompiledCodeToInterpreterBridge();
         }
@@ -391,12 +400,62 @@
   }
 }
 
+static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
+  thread->ResetQuickAllocEntryPointsForThread();
+}
+
+void Instrumentation::InstrumentQuickAllocEntryPoints() {
+  // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
+  //       should be guarded by a lock.
+  DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.load(), 0);
+  const bool enable_instrumentation =
+      quick_alloc_entry_points_instrumentation_counter_.fetch_add(1) == 0;
+  if (enable_instrumentation) {
+    // Instrumentation wasn't enabled so enable it.
+    SetQuickAllocEntryPointsInstrumented(true);
+    ResetQuickAllocEntryPoints();
+  }
+}
+
+void Instrumentation::UninstrumentQuickAllocEntryPoints() {
+  // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
+  //       should be guarded by a lock.
+  DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.load(), 0);
+  const bool disable_instrumentation =
+      quick_alloc_entry_points_instrumentation_counter_.fetch_sub(1) == 1;
+  if (disable_instrumentation) {
+    SetQuickAllocEntryPointsInstrumented(false);
+    ResetQuickAllocEntryPoints();
+  }
+}
+
+void Instrumentation::ResetQuickAllocEntryPoints() {
+  Runtime* runtime = Runtime::Current();
+  if (runtime->IsStarted()) {
+    ThreadList* tl = runtime->GetThreadList();
+    Thread* self = Thread::Current();
+    tl->SuspendAll();
+    {
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
+    }
+    tl->ResumeAll();
+  }
+}
+
 void Instrumentation::UpdateMethodsCode(mirror::ArtMethod* method, const void* code) const {
   if (LIKELY(!instrumentation_stubs_installed_)) {
     method->SetEntryPointFromCompiledCode(code);
   } else {
     if (!interpreter_stubs_installed_ || method->IsNative()) {
-      method->SetEntryPointFromCompiledCode(GetQuickInstrumentationEntryPoint());
+      // Do not overwrite resolution trampoline. When the trampoline initializes the method's
+      // class, all its static methods' code will be set to the instrumentation entry point.
+      // For more details, see ClassLinker::FixupStaticTrampolines.
+      if (code == GetResolutionTrampoline(Runtime::Current()->GetClassLinker())) {
+        method->SetEntryPointFromCompiledCode(code);
+      } else {
+        method->SetEntryPointFromCompiledCode(GetQuickInstrumentationEntryPoint());
+      }
     } else {
       method->SetEntryPointFromCompiledCode(GetCompiledCodeToInterpreterBridge());
     }
@@ -449,7 +508,7 @@
                                         uint32_t dex_pc) const {
   if (have_method_unwind_listeners_) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
-      listener->MethodUnwind(thread, method, dex_pc);
+      listener->MethodUnwind(thread, this_object, method, dex_pc);
     }
   }
 }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 7a0aaf7..72a646e 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_INSTRUMENTATION_H_
 #define ART_RUNTIME_INSTRUMENTATION_H_
 
+#include "atomic_integer.h"
 #include "base/macros.h"
 #include "locks.h"
 
@@ -36,8 +37,6 @@
 
 namespace instrumentation {
 
-const bool kVerboseInstrumentation = false;
-
 // Interpreter handler tables.
 enum InterpreterHandlerTable {
   kMainHandlerTable = 0,          // Main handler table: no suspend check, no instrumentation.
@@ -68,8 +67,9 @@
 
   // Call-back for when a method is popped due to an exception throw. A method will either cause a
   // MethodExited call-back or a MethodUnwind call-back when its activation is removed.
-  virtual void MethodUnwind(Thread* thread, const mirror::ArtMethod* method,
-                            uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                            const mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when the dex pc moves in a method.
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
@@ -104,7 +104,8 @@
       have_method_entry_listeners_(false), have_method_exit_listeners_(false),
       have_method_unwind_listeners_(false), have_dex_pc_listeners_(false),
       have_exception_caught_listeners_(false),
-      interpreter_handler_table_(kMainHandlerTable) {}
+      interpreter_handler_table_(kMainHandlerTable),
+      quick_alloc_entry_points_instrumentation_counter_(0) {}
 
   // Add a listener to be notified of the masked together sent of instrumentation events. This
   // suspend the runtime to install stubs. You are expected to hold the mutator lock as a proxy
@@ -123,6 +124,10 @@
     return interpreter_handler_table_;
   }
 
+  void InstrumentQuickAllocEntryPoints() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+  void UninstrumentQuickAllocEntryPoints() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+  void ResetQuickAllocEntryPoints();
+
   // Update the code of a method respecting any installed stubs.
   void UpdateMethodsCode(mirror::ArtMethod* method, const void* code) const;
 
@@ -289,9 +294,14 @@
   std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
-  // Current interpreter handler table. This is updated each time the thread state flags are modified.
+  // Current interpreter handler table. This is updated each time the thread state flags are
+  // modified.
   InterpreterHandlerTable interpreter_handler_table_;
 
+  // Greater than 0 if quick alloc entry points instrumented.
+  // TODO: The access and changes to this is racy and should be guarded by a lock.
+  AtomicInteger quick_alloc_entry_points_instrumentation_counter_;
+
   DISALLOW_COPY_AND_ASSIGN(Instrumentation);
 };
 
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 8f9e072..a829e97 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -48,7 +48,7 @@
   MutexLock mu(Thread::Current(), intern_table_lock_);
   if (!only_dirty || is_dirty_) {
     for (auto& strong_intern : strong_interns_) {
-      strong_intern.second = reinterpret_cast<mirror::String*>(visitor(strong_intern.second, arg));
+      strong_intern.second = down_cast<mirror::String*>(visitor(strong_intern.second, arg));
       DCHECK(strong_intern.second != nullptr);
     }
 
@@ -59,8 +59,7 @@
   // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
-mirror::String* InternTable::Lookup(Table& table, mirror::String* s,
-                                    uint32_t hash_code) {
+mirror::String* InternTable::Lookup(Table& table, mirror::String* s, uint32_t hash_code) {
   intern_table_lock_.AssertHeld(Thread::Current());
   for (auto it = table.find(hash_code), end = table.end(); it != end; ++it) {
     mirror::String* existing_string = it->second;
@@ -71,8 +70,7 @@
   return NULL;
 }
 
-mirror::String* InternTable::Insert(Table& table, mirror::String* s,
-                                    uint32_t hash_code) {
+mirror::String* InternTable::Insert(Table& table, mirror::String* s, uint32_t hash_code) {
   intern_table_lock_.AssertHeld(Thread::Current());
   table.insert(std::make_pair(hash_code, s));
   return s;
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index d7555dd..9938478 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -430,8 +430,8 @@
   if (method->IsStatic()) {
     Class* declaringClass = method->GetDeclaringClass();
     if (UNLIKELY(!declaringClass->IsInitializing())) {
-      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(declaringClass,
-                                                                            true, true))) {
+      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(declaringClass, true,
+                                                                            true))) {
         DCHECK(Thread::Current()->IsExceptionPending());
         self->PopShadowFrame();
         return;
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 19f55d2..c9756ac 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -29,7 +29,7 @@
                                   size_t dest_reg, size_t src_reg) {
   // If both register locations contains the same value, the register probably holds a reference.
   int32_t src_value = shadow_frame.GetVReg(src_reg);
-  mirror::Object* o = shadow_frame.GetVRegReference(src_reg);
+  mirror::Object* o = shadow_frame.GetVRegReference<false>(src_reg);
   if (src_value == reinterpret_cast<int32_t>(o)) {
     new_shadow_frame.SetVRegReference(dest_reg, o);
   } else {
@@ -193,7 +193,7 @@
     }
     return false;
   }
-  Object* newArray = Array::Alloc(self, arrayClass, length);
+  Object* newArray = Array::Alloc<true>(self, arrayClass, length);
   if (UNLIKELY(newArray == NULL)) {
     DCHECK(self->IsExceptionPending());
     return false;
@@ -233,7 +233,8 @@
   std::string name(PrettyMethod(shadow_frame->GetMethod()));
   if (name == "java.lang.Class java.lang.Class.forName(java.lang.String)") {
     std::string descriptor(DotToDescriptor(shadow_frame->GetVRegReference(arg_offset)->AsString()->ToModifiedUtf8().c_str()));
-    ClassLoader* class_loader = NULL;  // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
+
+    SirtRef<ClassLoader> class_loader(self, nullptr);  // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
     Class* found = Runtime::Current()->GetClassLinker()->FindClass(descriptor.c_str(),
                                                                    class_loader);
     CHECK(found != NULL) << "Class.forName failed in un-started runtime for class: "
@@ -278,7 +279,7 @@
     // TODO: getDeclaredField calls GetType once the field is found to ensure a
     //       NoClassDefFoundError is thrown if the field's type cannot be resolved.
     Class* jlr_Field = self->DecodeJObject(WellKnownClasses::java_lang_reflect_Field)->AsClass();
-    SirtRef<Object> field(self, jlr_Field->AllocObject(self));
+    SirtRef<Object> field(self, jlr_Field->AllocNonMovableObject(self));
     CHECK(field.get() != NULL);
     ArtMethod* c = jlr_Field->FindDeclaredDirectMethod("<init>", "(Ljava/lang/reflect/ArtField;)V");
     uint32_t args[1];
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index aa6bcd6..99c85bd 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -509,8 +509,9 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(NEW_INSTANCE) {
-    Object* obj = AllocObjectFromCodeInstrumented(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                                  self, do_access_check);
+    Object* obj = AllocObjectFromCode<do_access_check, true>(
+        inst->VRegB_21c(), shadow_frame.GetMethod(), self,
+        Runtime::Current()->GetHeap()->GetCurrentAllocator());
     if (UNLIKELY(obj == NULL)) {
       HANDLE_PENDING_EXCEPTION();
     } else {
@@ -522,8 +523,9 @@
 
   HANDLE_INSTRUCTION_START(NEW_ARRAY) {
     int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data));
-    Object* obj = AllocArrayFromCodeInstrumented(inst->VRegC_22c(), shadow_frame.GetMethod(),
-                                                 length, self, do_access_check);
+    Object* obj = AllocArrayFromCode<do_access_check, true>(
+        inst->VRegC_22c(), shadow_frame.GetMethod(), length, self,
+        Runtime::Current()->GetHeap()->GetCurrentAllocator());
     if (UNLIKELY(obj == NULL)) {
       HANDLE_PENDING_EXCEPTION();
     } else {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bd0d87e..675095f 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -422,8 +422,9 @@
       }
       case Instruction::NEW_INSTANCE: {
         PREAMBLE();
-        Object* obj = AllocObjectFromCodeInstrumented(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                                      self, do_access_check);
+        Object* obj = AllocObjectFromCode<do_access_check, true>(
+            inst->VRegB_21c(), shadow_frame.GetMethod(), self,
+            Runtime::Current()->GetHeap()->GetCurrentAllocator());
         if (UNLIKELY(obj == NULL)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
@@ -435,8 +436,9 @@
       case Instruction::NEW_ARRAY: {
         PREAMBLE();
         int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data));
-        Object* obj = AllocArrayFromCodeInstrumented(inst->VRegC_22c(), shadow_frame.GetMethod(),
-                                                     length, self, do_access_check);
+        Object* obj = AllocArrayFromCode<do_access_check, true>(
+            inst->VRegC_22c(), shadow_frame.GetMethod(), length, self,
+            Runtime::Current()->GetHeap()->GetCurrentAllocator());
         if (UNLIKELY(obj == NULL)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index a1657d0..fd78bf2 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -31,6 +31,7 @@
 struct iovec;
 
 namespace art {
+  union JValue;
 namespace mirror {
   class ArtMethod;
 }  // namespace mirror
@@ -185,8 +186,11 @@
    * issuing a MethodEntry on a native method.
    *
    * "eventFlags" indicates the types of events that have occurred.
+   *
+   * "returnValue" is non-null for MethodExit events only.
    */
-  bool PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags)
+  bool PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags,
+                         const JValue* returnValue)
      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 345549d..b05b49d 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -521,7 +521,7 @@
      * The JDWP thread has told us (and possibly all other threads) to
      * resume.  See if it has left anything in our DebugInvokeReq mailbox.
      */
-    if (!pReq->invoke_needed_) {
+    if (!pReq->invoke_needed) {
       /*LOGD("SuspendByPolicy: no invoke needed");*/
       break;
     }
@@ -535,12 +535,12 @@
     pReq->error = ERR_NONE;
 
     /* clear this before signaling */
-    pReq->invoke_needed_ = false;
+    pReq->invoke_needed = false;
 
     VLOG(jdwp) << "invoke complete, signaling and self-suspending";
     Thread* self = Thread::Current();
-    MutexLock mu(self, pReq->lock_);
-    pReq->cond_.Signal(self);
+    MutexLock mu(self, pReq->lock);
+    pReq->cond.Signal(self);
   }
 }
 
@@ -570,7 +570,7 @@
  */
 bool JdwpState::InvokeInProgress() {
   DebugInvokeReq* pReq = Dbg::GetInvokeReq();
-  return pReq->invoke_needed_;
+  return pReq->invoke_needed;
 }
 
 /*
@@ -719,7 +719,8 @@
  *  - Single-step to a line with a breakpoint.  Should get a single
  *    event message with both events in it.
  */
-bool JdwpState::PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags) {
+bool JdwpState::PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags,
+                                  const JValue* returnValue) {
   ModBasket basket;
   basket.pLoc = pLoc;
   basket.classId = pLoc->class_id;
@@ -771,9 +772,7 @@
     }
     if ((eventFlags & Dbg::kMethodExit) != 0) {
       FindMatchingEvents(EK_METHOD_EXIT, &basket, match_list, &match_count);
-
-      // TODO: match EK_METHOD_EXIT_WITH_RETURN_VALUE too; we need to include the 'value', though.
-      // FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, &basket, match_list, &match_count);
     }
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
@@ -792,6 +791,9 @@
         expandBufAdd4BE(pReq, match_list[i]->requestId);
         expandBufAdd8BE(pReq, basket.threadId);
         expandBufAddLocation(pReq, *pLoc);
+        if (match_list[i]->eventKind == EK_METHOD_EXIT_WITH_RETURN_VALUE) {
+          Dbg::OutputMethodReturnValue(pLoc->method_id, returnValue, pReq);
+        }
       }
     }
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index ec717c1..466edeb 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -22,6 +22,7 @@
 #include <utility>
 #include <vector>
 
+#include "atomic_integer.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
@@ -292,8 +293,8 @@
   Class* field_type;
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   if (sig[1] != '\0') {
-    ClassLoader* cl = GetClassLoader(soa);
-    field_type = class_linker->FindClass(sig, cl);
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), GetClassLoader(soa));
+    field_type = class_linker->FindClass(sig, class_loader);
   } else {
     field_type = class_linker->FindPrimitiveClass(*sig);
   }
@@ -646,8 +647,8 @@
     ScopedObjectAccess soa(env);
     Class* c = NULL;
     if (runtime->IsStarted()) {
-      ClassLoader* cl = GetClassLoader(soa);
-      c = class_linker->FindClass(descriptor.c_str(), cl);
+      SirtRef<mirror::ClassLoader> class_loader(soa.Self(), GetClassLoader(soa));
+      c = class_linker->FindClass(descriptor.c_str(), class_loader);
     } else {
       c = class_linker->FindSystemClass(descriptor.c_str());
     }
@@ -2002,14 +2003,22 @@
     String* s = soa.Decode<String*>(java_string);
     CharArray* chars = s->GetCharArray();
     PinPrimitiveArray(soa, chars);
-    if (is_copy != NULL) {
-      *is_copy = JNI_FALSE;
+    if (is_copy != nullptr) {
+      *is_copy = JNI_TRUE;
     }
-    return chars->GetData() + s->GetOffset();
+    int32_t char_count = s->GetLength();
+    int32_t offset = s->GetOffset();
+    jchar* bytes = new jchar[char_count + 1];
+    for (int32_t i = 0; i < char_count; i++) {
+      bytes[i] = chars->Get(i + offset);
+    }
+    bytes[char_count] = '\0';
+    return bytes;
   }
 
-  static void ReleaseStringChars(JNIEnv* env, jstring java_string, const jchar*) {
+  static void ReleaseStringChars(JNIEnv* env, jstring java_string, const jchar* chars) {
     CHECK_NON_NULL_ARGUMENT(GetStringUTFRegion, java_string);
+    delete[] chars;
     ScopedObjectAccess soa(env);
     UnpinPrimitiveArray(soa, soa.Decode<String*>(java_string)->GetCharArray());
   }
@@ -2120,8 +2129,8 @@
 
     // Find the class.
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    Class* array_class = class_linker->FindClass(descriptor.c_str(),
-                                                 element_class->GetClassLoader());
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), element_class->GetClassLoader());
+    Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
     if (array_class == NULL) {
       return NULL;
     }
@@ -2146,16 +2155,23 @@
     CHECK_NON_NULL_ARGUMENT(GetPrimitiveArrayCritical, java_array);
     ScopedObjectAccess soa(env);
     Array* array = soa.Decode<Array*>(java_array);
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    if (heap->IsMovableObject(array)) {
+      heap->IncrementDisableGC(soa.Self());
+      // Re-decode in case the object moved since IncrementDisableGC waits for GC to complete.
+      array = soa.Decode<Array*>(java_array);
+    }
     PinPrimitiveArray(soa, array);
-    if (is_copy != NULL) {
+    if (is_copy != nullptr) {
       *is_copy = JNI_FALSE;
     }
-    return array->GetRawData(array->GetClass()->GetComponentSize());
+    void* address = array->GetRawData(array->GetClass()->GetComponentSize());;
+    return address;
   }
 
-  static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray array, void*, jint mode) {
+  static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray array, void* elements, jint mode) {
     CHECK_NON_NULL_ARGUMENT(ReleasePrimitiveArrayCritical, array);
-    ReleasePrimitiveArray(env, array, mode);
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
   static jboolean* GetBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean* is_copy) {
@@ -2206,36 +2222,40 @@
     return GetPrimitiveArray<jshortArray, jshort*, ShortArray>(soa, array, is_copy);
   }
 
-  static void ReleaseBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean* elements,
+                                          jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseByteArrayElements(JNIEnv* env, jbyteArray array, jbyte*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseByteArrayElements(JNIEnv* env, jbyteArray array, jbyte* elements, jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseCharArrayElements(JNIEnv* env, jcharArray array, jchar*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseCharArrayElements(JNIEnv* env, jcharArray array, jchar* elements, jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseDoubleArrayElements(JNIEnv* env, jdoubleArray array, jdouble*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseDoubleArrayElements(JNIEnv* env, jdoubleArray array, jdouble* elements,
+                                         jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseFloatArrayElements(JNIEnv* env, jfloatArray array, jfloat*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseFloatArrayElements(JNIEnv* env, jfloatArray array, jfloat* elements,
+                                        jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseIntArrayElements(JNIEnv* env, jintArray array, jint*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseIntArrayElements(JNIEnv* env, jintArray array, jint* elements, jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseLongArrayElements(JNIEnv* env, jlongArray array, jlong*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseLongArrayElements(JNIEnv* env, jlongArray array, jlong* elements, jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseShortArrayElements(JNIEnv* env, jshortArray array, jshort*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseShortArrayElements(JNIEnv* env, jshortArray array, jshort* elements,
+                                        jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
   static void GetBooleanArrayRegion(JNIEnv* env, jbooleanArray array, jsize start, jsize length,
@@ -2551,19 +2571,49 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ArtArrayT* array = soa.Decode<ArtArrayT*>(java_array);
     PinPrimitiveArray(soa, array);
-    if (is_copy != NULL) {
-      *is_copy = JNI_FALSE;
+    // Only make a copy if necessary.
+    if (Runtime::Current()->GetHeap()->IsMovableObject(array)) {
+      if (is_copy != nullptr) {
+        *is_copy = JNI_TRUE;
+      }
+      static const size_t component_size = array->GetClass()->GetComponentSize();
+      size_t size = array->GetLength() * component_size;
+      void* data = new uint64_t[RoundUp(size, 8) / 8];
+      memcpy(data, array->GetData(), size);
+      return reinterpret_cast<CArrayT>(data);
+    } else {
+      if (is_copy != nullptr) {
+        *is_copy = JNI_FALSE;
+      }
+      return reinterpret_cast<CArrayT>(array->GetData());
     }
-    return array->GetData();
   }
 
-  template <typename ArrayT>
-  static void ReleasePrimitiveArray(JNIEnv* env, ArrayT java_array, jint mode) {
-    if (mode != JNI_COMMIT) {
-      ScopedObjectAccess soa(env);
-      Array* array = soa.Decode<Array*>(java_array);
-      UnpinPrimitiveArray(soa, array);
+  template <typename ArrayT, typename ElementT>
+  static void ReleasePrimitiveArray(JNIEnv* env, ArrayT java_array, ElementT* elements, jint mode) {
+    ScopedObjectAccess soa(env);
+    Array* array = soa.Decode<Array*>(java_array);
+    size_t component_size = array->GetClass()->GetComponentSize();
+    void* array_data = array->GetRawData(component_size);
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    bool is_copy = array_data != reinterpret_cast<void*>(elements);
+    size_t bytes = array->GetLength() * component_size;
+    VLOG(heap) << "Release primitive array " << env << " array_data " << array_data
+               << " elements " << reinterpret_cast<void*>(elements);
+    if (!is_copy && heap->IsMovableObject(array)) {
+      heap->DecrementDisableGC(soa.Self());
     }
+    // Don't need to copy if we had a direct pointer.
+    if (mode != JNI_ABORT && is_copy) {
+      memcpy(array_data, elements, bytes);
+    }
+    if (mode != JNI_COMMIT) {
+      if (is_copy) {
+        delete[] reinterpret_cast<uint64_t*>(elements);
+      }
+    }
+    // TODO: Do we always unpin primitive array?
+    UnpinPrimitiveArray(soa, array);
   }
 
   template <typename JavaArrayT, typename JavaT, typename ArrayT>
@@ -2854,6 +2904,18 @@
 JNIEnvExt::~JNIEnvExt() {
 }
 
+jobject JNIEnvExt::NewLocalRef(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (obj == nullptr) {
+    return nullptr;
+  }
+  return reinterpret_cast<jobject>(locals.Add(local_ref_cookie, obj));
+}
+
+void JNIEnvExt::DeleteLocalRef(jobject obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (obj != nullptr) {
+    locals.Remove(local_ref_cookie, reinterpret_cast<IndirectRef>(obj));
+  }
+}
 void JNIEnvExt::SetCheckJniEnabled(bool enabled) {
   check_jni = enabled;
   functions = enabled ? GetCheckJniNativeInterface() : &gJniNativeInterface;
@@ -3199,7 +3261,7 @@
     // the comments in the JNI FindClass function.)
     typedef int (*JNI_OnLoadFn)(JavaVM*, void*);
     JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym);
-    ClassLoader* old_class_loader = self->GetClassLoaderOverride();
+    SirtRef<ClassLoader> old_class_loader(self, self->GetClassLoaderOverride());
     self->SetClassLoaderOverride(class_loader);
 
     int version = 0;
@@ -3209,7 +3271,7 @@
       version = (*jni_on_load)(this, NULL);
     }
 
-    self->SetClassLoaderOverride(old_class_loader);
+    self->SetClassLoaderOverride(old_class_loader.get());
 
     if (version == JNI_ERR) {
       StringAppendF(detail, "JNI_ERR returned from JNI_OnLoad in \"%s\"", path.c_str());
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 888d5e5..96f7ae0 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -162,6 +162,9 @@
     return Offset(OFFSETOF_MEMBER(JNIEnvExt, self));
   }
 
+  jobject NewLocalRef(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DeleteLocalRef(jobject obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   Thread* const self;
   JavaVMExt* vm;
 
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index c389580..26b1836 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -86,19 +86,19 @@
     const char* class_name = is_static ? "StaticLeafMethods" : "NonStaticLeafMethods";
     jobject jclass_loader(LoadDex(class_name));
     Thread* self = Thread::Current();
+    SirtRef<mirror::ClassLoader> null_class_loader(self, nullptr);
     SirtRef<mirror::ClassLoader>
         class_loader(self,
                      ScopedObjectAccessUnchecked(self).Decode<mirror::ClassLoader*>(jclass_loader));
     if (is_static) {
-      CompileDirectMethod(class_loader.get(), class_name, method_name, method_signature);
+      CompileDirectMethod(class_loader, class_name, method_name, method_signature);
     } else {
-      CompileVirtualMethod(NULL, "java.lang.Class", "isFinalizable", "()Z");
-      CompileDirectMethod(NULL, "java.lang.Object", "<init>", "()V");
-      CompileVirtualMethod(class_loader.get(), class_name, method_name, method_signature);
+      CompileVirtualMethod(null_class_loader, "java.lang.Class", "isFinalizable", "()Z");
+      CompileDirectMethod(null_class_loader, "java.lang.Object", "<init>", "()V");
+      CompileVirtualMethod(class_loader, class_name, method_name, method_signature);
     }
 
-    mirror::Class* c = class_linker_->FindClass(DotToDescriptor(class_name).c_str(),
-                                                class_loader.get());
+    mirror::Class* c = class_linker_->FindClass(DotToDescriptor(class_name).c_str(), class_loader);
     CHECK(c != NULL);
 
     method = is_static ? c->FindDirectMethod(method_name, method_signature)
@@ -1081,7 +1081,6 @@
   EXPECT_EQ(memcmp(&src_buf[0], xs, size * sizeof(scalar_type)), 0) \
     << # get_elements_fn " not equal"; \
   env_->release_elements_fn(a, xs, 0); \
-  EXPECT_EQ(reinterpret_cast<uintptr_t>(v), reinterpret_cast<uintptr_t>(xs))
 
 TEST_F(JniInternalTest, BooleanArrays) {
   EXPECT_PRIMITIVE_ARRAY(NewBooleanArray, GetBooleanArrayRegion, SetBooleanArrayRegion,
@@ -1337,7 +1336,7 @@
 
   jboolean is_copy = JNI_FALSE;
   chars = env_->GetStringChars(s, &is_copy);
-  EXPECT_EQ(JNI_FALSE, is_copy);
+  EXPECT_EQ(JNI_TRUE, is_copy);
   EXPECT_EQ(expected[0], chars[0]);
   EXPECT_EQ(expected[1], chars[1]);
   EXPECT_EQ(expected[2], chars[2]);
@@ -1361,7 +1360,8 @@
 
   jboolean is_copy = JNI_FALSE;
   chars = env_->GetStringCritical(s, &is_copy);
-  EXPECT_EQ(JNI_FALSE, is_copy);
+  // TODO: Fix GetStringCritical to use the same mechanism as GetPrimitiveArrayElementsCritical.
+  EXPECT_EQ(JNI_TRUE, is_copy);
   EXPECT_EQ(expected[0], chars[0]);
   EXPECT_EQ(expected[1], chars[1]);
   EXPECT_EQ(expected[2], chars[2]);
@@ -1669,9 +1669,9 @@
   jobject jclass_loader = LoadDex("Main");
   SirtRef<mirror::ClassLoader>
       class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
-  CompileDirectMethod(class_loader.get(), "Main", "main", "([Ljava/lang/String;)V");
+  CompileDirectMethod(class_loader, "Main", "main", "([Ljava/lang/String;)V");
 
-  mirror::Class* klass = class_linker_->FindClass("LMain;", class_loader.get());
+  mirror::Class* klass = class_linker_->FindClass("LMain;", class_loader);
   ASSERT_TRUE(klass != NULL);
 
   mirror::ArtMethod* method = klass->FindDirectMethod("main", "([Ljava/lang/String;)V");
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index efd3d9d..aea10c2 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -36,6 +36,11 @@
   return reinterpret_cast<Monitor*>(value_ << kStateSize);
 }
 
+inline size_t LockWord::ForwardingAddress() const {
+  DCHECK_EQ(GetState(), kForwardingAddress);
+  return static_cast<size_t>(value_ << kStateSize);
+}
+
 inline LockWord::LockWord() : value_(0) {
   DCHECK_EQ(GetState(), kUnlocked);
 }
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 1882ae6..d24a3bb 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -21,6 +21,7 @@
 #include <stdint.h>
 
 #include "base/logging.h"
+#include "utils.h"
 
 namespace art {
 namespace mirror {
@@ -73,6 +74,7 @@
     kStateThinOrUnlocked = 0,
     kStateFat = 1,
     kStateHash = 2,
+    kStateForwardingAddress = 3,
 
     // When the state is kHashCode, the non-state bits hold the hashcode.
     kHashShift = 0,
@@ -86,6 +88,11 @@
                      (kStateThinOrUnlocked << kStateShift));
   }
 
+  static LockWord FromForwardingAddress(size_t target) {
+    DCHECK(IsAligned < 1 << kStateSize>(target));
+    return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift));
+  }
+
   static LockWord FromHashCode(uint32_t hash_code) {
     CHECK_LE(hash_code, static_cast<uint32_t>(kHashMask));
     return LockWord((hash_code << kHashShift) | (kStateHash << kStateShift));
@@ -96,19 +103,25 @@
     kThinLocked,  // Single uncontended owner.
     kFatLocked,   // See associated monitor.
     kHashCode,    // Lock word contains an identity hash.
+    kForwardingAddress,  // Lock word contains the forwarding address of an object.
   };
 
   LockState GetState() const {
-    uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
-    if (value_ == 0) {
+    if (UNLIKELY(value_ == 0)) {
       return kUnlocked;
-    } else if (internal_state == kStateThinOrUnlocked) {
-      return kThinLocked;
-    } else if (internal_state == kStateHash) {
-      return kHashCode;
     } else {
-      DCHECK_EQ(internal_state, static_cast<uint32_t>(kStateFat));
-      return kFatLocked;
+      uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
+      switch (internal_state) {
+        case kStateThinOrUnlocked:
+          return kThinLocked;
+        case kStateHash:
+          return kHashCode;
+        case kStateForwardingAddress:
+          return kForwardingAddress;
+        default:
+          DCHECK_EQ(internal_state, static_cast<uint32_t>(kStateFat));
+          return kFatLocked;
+      }
     }
   }
 
@@ -121,6 +134,9 @@
   // Return the Monitor encoded in a fat lock.
   Monitor* FatLockMonitor() const;
 
+  // Return the forwarding address stored in the monitor.
+  size_t ForwardingAddress() const;
+
   // Default constructor with no lock ownership.
   LockWord();
 
diff --git a/runtime/locks.h b/runtime/locks.h
index 2262218..2308e95 100644
--- a/runtime/locks.h
+++ b/runtime/locks.h
@@ -37,6 +37,9 @@
   kThreadSuspendCountLock,
   kAbortLock,
   kJdwpSocketLock,
+  kRosAllocGlobalLock,
+  kRosAllocBracketLock,
+  kRosAllocBulkFreeLock,
   kAllocSpaceLock,
   kMarkSweepMarkStackLock,
   kDefaultMutexLevel,
diff --git a/runtime/mapping_table.h b/runtime/mapping_table.h
index 2162008..c468c1e 100644
--- a/runtime/mapping_table.h
+++ b/runtime/mapping_table.h
@@ -30,7 +30,7 @@
 
   uint32_t TotalSize() const PURE {
     const uint8_t* table = encoded_table_;
-    if (table == NULL) {
+    if (table == nullptr) {
       return 0;
     } else {
       return DecodeUnsignedLeb128(&table);
@@ -39,7 +39,7 @@
 
   uint32_t DexToPcSize() const PURE {
     const uint8_t* table = encoded_table_;
-    if (table == NULL) {
+    if (table == nullptr) {
       return 0;
     } else {
       uint32_t total_size = DecodeUnsignedLeb128(&table);
@@ -50,9 +50,11 @@
 
   const uint8_t* FirstDexToPcPtr() const {
     const uint8_t* table = encoded_table_;
-    if (table != NULL) {
-      DecodeUnsignedLeb128(&table);  // Total_size, unused.
+    if (table != nullptr) {
+      uint32_t total_size = DecodeUnsignedLeb128(&table);
       uint32_t pc_to_dex_size = DecodeUnsignedLeb128(&table);
+      // We must have dex to pc entries or else the loop will go beyond the end of the table.
+      DCHECK_GT(total_size, pc_to_dex_size);
       for (uint32_t i = 0; i < pc_to_dex_size; ++i) {
         DecodeUnsignedLeb128(&table);  // Move ptr past native PC.
         DecodeUnsignedLeb128(&table);  // Move ptr past dex PC.
@@ -64,13 +66,15 @@
   class DexToPcIterator {
    public:
     DexToPcIterator(const MappingTable* table, uint32_t element) :
-        table_(table), element_(element), end_(table_->DexToPcSize()), encoded_table_ptr_(NULL),
+        table_(table), element_(element), end_(table_->DexToPcSize()), encoded_table_ptr_(nullptr),
         native_pc_offset_(0), dex_pc_(0) {
-      if (element == 0) {
-        encoded_table_ptr_ = table_->FirstDexToPcPtr();
-        native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-        dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-      } else {
+      if (element == 0) {  // An iterator wanted from the start.
+        if (end_ > 0) {
+          encoded_table_ptr_ = table_->FirstDexToPcPtr();
+          native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+          dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+        }
+      } else {  // An iterator wanted from the end.
         DCHECK_EQ(table_->DexToPcSize(), element);
       }
     }
@@ -100,7 +104,7 @@
     const MappingTable* const table_;  // The original table.
     uint32_t element_;  // A value in the range 0 to end_.
     const uint32_t end_;  // Equal to table_->DexToPcSize().
-    const uint8_t* encoded_table_ptr_;  // Either NULL or points to encoded data after this entry.
+    const uint8_t* encoded_table_ptr_;  // Either nullptr or points to encoded data after this entry.
     uint32_t native_pc_offset_;  // The current value of native pc offset.
     uint32_t dex_pc_;  // The current value of dex pc.
   };
@@ -116,7 +120,7 @@
 
   uint32_t PcToDexSize() const PURE {
     const uint8_t* table = encoded_table_;
-    if (table == NULL) {
+    if (table == nullptr) {
       return 0;
     } else {
       DecodeUnsignedLeb128(&table);  // Total_size, unused.
@@ -127,7 +131,7 @@
 
   const uint8_t* FirstPcToDexPtr() const {
     const uint8_t* table = encoded_table_;
-    if (table != NULL) {
+    if (table != nullptr) {
       DecodeUnsignedLeb128(&table);  // Total_size, unused.
       DecodeUnsignedLeb128(&table);  // PC to Dex size, unused.
     }
@@ -137,13 +141,15 @@
   class PcToDexIterator {
    public:
     PcToDexIterator(const MappingTable* table, uint32_t element) :
-        table_(table), element_(element), end_(table_->PcToDexSize()), encoded_table_ptr_(NULL),
+        table_(table), element_(element), end_(table_->PcToDexSize()), encoded_table_ptr_(nullptr),
         native_pc_offset_(0), dex_pc_(0) {
-      if (element == 0) {
-        encoded_table_ptr_ = table_->FirstPcToDexPtr();
-        native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-        dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-      } else {
+      if (element == 0) {  // An iterator wanted from the start.
+        if (end_ > 0) {
+          encoded_table_ptr_ = table_->FirstPcToDexPtr();
+          native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+          dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+        }
+      } else {  // An iterator wanted from the end.
         DCHECK_EQ(table_->PcToDexSize(), element);
       }
     }
@@ -173,7 +179,7 @@
     const MappingTable* const table_;  // The original table.
     uint32_t element_;  // A value in the range 0 to PcToDexSize.
     const uint32_t end_;  // Equal to table_->PcToDexSize().
-    const uint8_t* encoded_table_ptr_;  // Either NULL or points to encoded data after this entry.
+    const uint8_t* encoded_table_ptr_;  // Either null or points to encoded data after this entry.
     uint32_t native_pc_offset_;  // The current value of native pc offset.
     uint32_t dex_pc_;  // The current value of dex pc.
   };
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 3afb606..39e838f 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -133,12 +133,13 @@
                                               fd,
                                               page_aligned_offset));
   if (actual == MAP_FAILED) {
+    std::string strerr(strerror(errno));
     std::string maps;
     ReadFileToString("/proc/self/maps", &maps);
-    *error_msg = StringPrintf("mmap(%p, %zd, %x, %x, %d, %lld) of file '%s' failed\n%s",
+    *error_msg = StringPrintf("mmap(%p, %zd, %x, %x, %d, %lld) of file '%s' failed: %s\n%s",
                               page_aligned_addr, page_aligned_byte_count, prot, flags, fd,
-                              static_cast<int64_t>(page_aligned_offset),
-                              filename, maps.c_str());
+                              static_cast<int64_t>(page_aligned_offset), filename, strerr.c_str(),
+                              maps.c_str());
     return NULL;
   }
   return new MemMap("file", actual + page_offset, byte_count, actual, page_aligned_byte_count,
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index c60e714..a754b69 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -58,44 +58,54 @@
   return size;
 }
 
-static inline Array* SetArrayLength(Array* array, size_t length) {
-  if (LIKELY(array != NULL)) {
+// Used for setting the array length in the allocation code path to ensure it is guarded by a CAS.
+class SetLengthVisitor {
+ public:
+  explicit SetLengthVisitor(int32_t length) : length_(length) {
+  }
+
+  void operator()(mirror::Object* obj) const {
+    mirror::Array* array = obj->AsArray();
     DCHECK(array->IsArrayInstance());
-    array->SetLength(length);
+    array->SetLength(length_);
   }
-  return array;
-}
 
-inline Array* Array::AllocInstrumented(Thread* self, Class* array_class, int32_t component_count,
-                                       size_t component_size) {
+ private:
+  const int32_t length_;
+};
+
+template <bool kIsInstrumented>
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
+                           size_t component_size, gc::AllocatorType allocator_type) {
   size_t size = ComputeArraySize(self, array_class, component_count, component_size);
   if (UNLIKELY(size == 0)) {
-    return NULL;
+    return nullptr;
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  Array* array = down_cast<Array*>(heap->AllocObjectInstrumented(self, array_class, size));
-  return SetArrayLength(array, component_count);
+  SetLengthVisitor visitor(component_count);
+  return down_cast<Array*>(
+      heap->AllocObjectWithAllocator<kIsInstrumented>(self, array_class, size, allocator_type,
+                                                      visitor));
 }
 
-inline Array* Array::AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count,
-                                         size_t component_size) {
-  size_t size = ComputeArraySize(self, array_class, component_count, component_size);
-  if (UNLIKELY(size == 0)) {
-    return NULL;
-  }
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  Array* array = down_cast<Array*>(heap->AllocObjectUninstrumented(self, array_class, size));
-  return SetArrayLength(array, component_count);
-}
-
-inline Array* Array::AllocInstrumented(Thread* self, Class* array_class, int32_t component_count) {
+template <bool kIsInstrumented>
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
+                           gc::AllocatorType allocator_type) {
   DCHECK(array_class->IsArrayClass());
-  return AllocInstrumented(self, array_class, component_count, array_class->GetComponentSize());
+  return Alloc<kIsInstrumented>(self, array_class, component_count, array_class->GetComponentSize(),
+                                allocator_type);
+}
+template <bool kIsInstrumented>
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count) {
+  return Alloc<kIsInstrumented>(self, array_class, component_count,
+               Runtime::Current()->GetHeap()->GetCurrentAllocator());
 }
 
-inline Array* Array::AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count) {
-  DCHECK(array_class->IsArrayClass());
-  return AllocUninstrumented(self, array_class, component_count, array_class->GetComponentSize());
+template <bool kIsInstrumented>
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
+                           size_t component_size) {
+  return Alloc<kIsInstrumented>(self, array_class, component_count, component_size,
+               Runtime::Current()->GetHeap()->GetCurrentAllocator());
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 020085d..00b88db 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -41,15 +41,15 @@
 // Recursively create an array with multiple dimensions.  Elements may be
 // Objects or primitive types.
 static Array* RecursiveCreateMultiArray(Thread* self, Class* array_class, int current_dimension,
-                                        IntArray* dimensions)
+                                        SirtRef<mirror::IntArray>& dimensions)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   int32_t array_length = dimensions->Get(current_dimension);
-  SirtRef<Array> new_array(self, Array::Alloc(self, array_class, array_length));
+  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class, array_length));
   if (UNLIKELY(new_array.get() == NULL)) {
     CHECK(self->IsExceptionPending());
     return NULL;
   }
-  if ((current_dimension + 1) < dimensions->GetLength()) {
+  if (current_dimension + 1 < dimensions->GetLength()) {
     // Create a new sub-array in every element of the array.
     for (int32_t i = 0; i < array_length; i++) {
       Array* sub_array = RecursiveCreateMultiArray(self, array_class->GetComponentType(),
@@ -87,13 +87,15 @@
 
   // Find/generate the array class.
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Class* array_class = class_linker->FindClass(descriptor.c_str(), element_class->GetClassLoader());
+  SirtRef<mirror::ClassLoader> class_loader(self, element_class->GetClassLoader());
+  Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
   if (UNLIKELY(array_class == NULL)) {
     CHECK(self->IsExceptionPending());
     return NULL;
   }
   // create the array
-  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, dimensions);
+  SirtRef<mirror::IntArray> sirt_dimensions(self, dimensions);
+  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, sirt_dimensions);
   if (UNLIKELY(new_array == NULL)) {
     CHECK(self->IsExceptionPending());
     return NULL;
@@ -112,7 +114,7 @@
 template<typename T>
 PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) {
   DCHECK(array_class_ != NULL);
-  Array* raw_array = Array::Alloc(self, array_class_, length, sizeof(T));
+  Array* raw_array = Array::Alloc<true>(self, array_class_, length, sizeof(T));
   return down_cast<PrimitiveArray<T>*>(raw_array);
 }
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 570dcaa..a332f97 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -18,33 +18,32 @@
 #define ART_RUNTIME_MIRROR_ARRAY_H_
 
 #include "object.h"
+#include "gc/heap.h"
 
 namespace art {
 namespace mirror {
 
 class MANAGED Array : public Object {
  public:
-  // A convenience for code that doesn't know the component size,
-  // and doesn't want to have to work it out itself.
-  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return AllocInstrumented(self, array_class, component_count);
-  }
-  static Array* AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static Array* AllocInstrumented(Thread* self, Class* array_class, int32_t component_count)
+  // A convenience for code that doesn't know the component size, and doesn't want to have to work
+  // it out itself.
+  template <bool kIsInstrumented>
+  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
+                      gc::AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template <bool kIsInstrumented>
+  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
+                      size_t component_size, gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
+  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
   static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
                       size_t component_size)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return AllocInstrumented(self, array_class, component_count, component_size);
-  }
-  static Array* AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count,
-                                    size_t component_size)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static Array* AllocInstrumented(Thread* self, Class* array_class, int32_t component_count,
-                                  size_t component_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions)
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 7f3a302..3a28974 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -248,7 +248,7 @@
   if (method->IsDirect()) {
     return method;
   }
-  if (method->GetDeclaringClass()->IsInterface()) {
+  if (method->GetDeclaringClass()->IsInterface() && !method->IsMiranda()) {
     return FindVirtualMethodForInterface(method);
   }
   return FindVirtualMethodForVirtual(method);
@@ -357,14 +357,20 @@
   DCHECK_GE(this->object_size_, sizeof(Object));
 }
 
-inline Object* Class::AllocObjectInstrumented(Thread* self) {
+template <bool kIsInstrumented>
+inline Object* Class::Alloc(Thread* self, gc::AllocatorType allocator_type) {
   CheckObjectAlloc();
-  return Runtime::Current()->GetHeap()->AllocObjectInstrumented(self, this, this->object_size_);
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  return heap->AllocObjectWithAllocator<kIsInstrumented>(self, this, this->object_size_,
+                                                         allocator_type);
 }
 
-inline Object* Class::AllocObjectUninstrumented(Thread* self) {
-  CheckObjectAlloc();
-  return Runtime::Current()->GetHeap()->AllocObjectUninstrumented(self, this, this->object_size_);
+inline Object* Class::AllocObject(Thread* self) {
+  return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentAllocator());
+}
+
+inline Object* Class::AllocNonMovableObject(Thread* self) {
+  return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index f3cb54a..cdc5ab2 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -52,7 +52,8 @@
 
 void Class::SetStatus(Status new_status, Thread* self) {
   Status old_status = GetStatus();
-  bool class_linker_initialized = Runtime::Current()->GetClassLinker() != nullptr;
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  bool class_linker_initialized = class_linker != nullptr && class_linker->IsInitialized();
   if (LIKELY(class_linker_initialized)) {
     if (UNLIKELY(new_status <= old_status && new_status != kStatusError)) {
       LOG(FATAL) << "Unexpected change back of class status for " << PrettyClass(this) << " "
@@ -588,7 +589,6 @@
 ArtField* Class::FindStaticField(const StringPiece& name, const StringPiece& type) {
   // Is the field in this class (or its interfaces), or any of its
   // superclasses (or their interfaces)?
-  ClassHelper kh;
   for (Class* k = this; k != NULL; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(name, type);
@@ -596,7 +596,7 @@
       return f;
     }
     // Is this field in any of this class' interfaces?
-    kh.ChangeClass(k);
+    ClassHelper kh(k);
     for (uint32_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
       Class* interface = kh.GetDirectInterface(i);
       f = interface->FindStaticField(name, type);
@@ -609,7 +609,6 @@
 }
 
 ArtField* Class::FindStaticField(const DexCache* dex_cache, uint32_t dex_field_idx) {
-  ClassHelper kh;
   for (Class* k = this; k != NULL; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
@@ -617,7 +616,7 @@
       return f;
     }
     // Is this field in any of this class' interfaces?
-    kh.ChangeClass(k);
+    ClassHelper kh(k);
     for (uint32_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
       Class* interface = kh.GetDirectInterface(i);
       f = interface->FindStaticField(dex_cache, dex_field_idx);
@@ -631,7 +630,6 @@
 
 ArtField* Class::FindField(const StringPiece& name, const StringPiece& type) {
   // Find a field using the JLS field resolution order
-  ClassHelper kh;
   for (Class* k = this; k != NULL; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredInstanceField(name, type);
@@ -643,7 +641,7 @@
       return f;
     }
     // Is this field in any of this class' interfaces?
-    kh.ChangeClass(k);
+    ClassHelper kh(k);
     for (uint32_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
       Class* interface = kh.GetDirectInterface(i);
       f = interface->FindStaticField(name, type);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index ed1aad3..5f64bb4 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_H_
 #define ART_RUNTIME_MIRROR_CLASS_H_
 
+#include "gc/heap.h"
 #include "modifiers.h"
 #include "object.h"
 #include "primitive.h"
@@ -377,12 +378,14 @@
   }
 
   // Creates a raw object instance but does not invoke the default constructor.
-  Object* AllocObject(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return AllocObjectInstrumented(self);
-  }
+  template <bool kIsInstrumented>
+  ALWAYS_INLINE Object* Alloc(Thread* self, gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Object* AllocObjectUninstrumented(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  Object* AllocObjectInstrumented(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* AllocObject(Thread* self)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* AllocNonMovableObject(Thread* self)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsVariableSize() const {
     // Classes and arrays vary in size, and so the object_size_ field cannot
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index bd187c1..008a173 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -39,49 +39,50 @@
 namespace art {
 namespace mirror {
 
-Object* Object::Clone(Thread* self) {
-  Class* c = GetClass();
-  DCHECK(!c->IsClassClass());
-
-  // Object::SizeOf gets the right size even if we're an array.
-  // Using c->AllocObject() here would be wrong.
-  size_t num_bytes = SizeOf();
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  SirtRef<Object> copy(self, heap->AllocObject(self, c, num_bytes));
-  if (copy.get() == NULL) {
-    return NULL;
-  }
-
+static Object* CopyObject(Thread* self, mirror::Object* dest, mirror::Object* src, size_t num_bytes)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Copy instance data.  We assume memcpy copies by words.
   // TODO: expose and use move32.
-  byte* src_bytes = reinterpret_cast<byte*>(this);
-  byte* dst_bytes = reinterpret_cast<byte*>(copy.get());
+  byte* src_bytes = reinterpret_cast<byte*>(src);
+  byte* dst_bytes = reinterpret_cast<byte*>(dest);
   size_t offset = sizeof(Object);
   memcpy(dst_bytes + offset, src_bytes + offset, num_bytes - offset);
-
+  gc::Heap* heap = Runtime::Current()->GetHeap();
   // Perform write barriers on copied object references.
+  Class* c = src->GetClass();
   if (c->IsArrayClass()) {
     if (!c->GetComponentType()->IsPrimitive()) {
-      const ObjectArray<Object>* array = copy->AsObjectArray<Object>();
-      heap->WriteBarrierArray(copy.get(), 0, array->GetLength());
+      const ObjectArray<Object>* array = dest->AsObjectArray<Object>();
+      heap->WriteBarrierArray(dest, 0, array->GetLength());
     }
   } else {
-    for (const Class* klass = c; klass != NULL; klass = klass->GetSuperClass()) {
-      size_t num_reference_fields = klass->NumReferenceInstanceFields();
-      for (size_t i = 0; i < num_reference_fields; ++i) {
-        ArtField* field = klass->GetInstanceField(i);
-        MemberOffset field_offset = field->GetOffset();
-        const Object* ref = copy->GetFieldObject<const Object*>(field_offset, false);
-        heap->WriteBarrierField(copy.get(), field_offset, ref);
-      }
-    }
+    heap->WriteBarrierEveryFieldOf(dest);
   }
-
   if (c->IsFinalizable()) {
-    heap->AddFinalizerReference(Thread::Current(), copy.get());
+    SirtRef<Object> sirt_dest(self, dest);
+    heap->AddFinalizerReference(self, dest);
+    return sirt_dest.get();
   }
+  return dest;
+}
 
-  return copy.get();
+Object* Object::Clone(Thread* self) {
+  CHECK(!IsClass()) << "Can't clone classes.";
+  // Object::SizeOf gets the right size even if we're an array. Using c->AllocObject() here would
+  // be wrong.
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  size_t num_bytes = SizeOf();
+  SirtRef<Object> this_object(self, this);
+  Object* copy;
+  if (heap->IsMovableObject(this)) {
+    copy = heap->AllocObject<true>(self, GetClass(), num_bytes);
+  } else {
+    copy = heap->AllocNonMovableObject<true>(self, GetClass(), num_bytes);
+  }
+  if (LIKELY(copy != nullptr)) {
+    return CopyObject(self, copy, this_object.get(), num_bytes);
+  }
+  return copy;
 }
 
 int32_t Object::GenerateIdentityHashCode() {
@@ -96,8 +97,9 @@
 }
 
 int32_t Object::IdentityHashCode() const {
+  mirror::Object* current_this = const_cast<mirror::Object*>(this);
   while (true) {
-    LockWord lw = GetLockWord();
+    LockWord lw = current_this->GetLockWord();
     switch (lw.GetState()) {
       case LockWord::kUnlocked: {
         // Try to compare and swap in a new hash, if we succeed we will return the hash on the next
@@ -112,7 +114,10 @@
       case LockWord::kThinLocked: {
         // Inflate the thin lock to a monitor and stick the hash code inside of the monitor.
         Thread* self = Thread::Current();
-        Monitor::InflateThinLocked(self, const_cast<Object*>(this), lw, GenerateIdentityHashCode());
+        SirtRef<mirror::Object> sirt_this(self, current_this);
+        Monitor::InflateThinLocked(self, sirt_this, lw, GenerateIdentityHashCode());
+        // A GC may have occurred when we switched to kBlocked.
+        current_this = sirt_this.get();
         break;
       }
       case LockWord::kFatLocked: {
@@ -124,6 +129,10 @@
       case LockWord::kHashCode: {
         return lw.GetHashCode();
       }
+      default: {
+        LOG(FATAL) << "Invalid state during hashcode " << lw.GetState();
+        break;
+      }
     }
   }
   LOG(FATAL) << "Unreachable";
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index e8ea3f2..0fb2039 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -30,6 +30,7 @@
 class Monitor;
 struct ObjectOffsets;
 class Thread;
+template <typename T> class SirtRef;
 
 namespace mirror {
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index abc88a3..be49b42 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -23,22 +23,32 @@
 #include "mirror/art_field.h"
 #include "mirror/class.h"
 #include "runtime.h"
+#include "sirt_ref.h"
 #include "thread.h"
 
 namespace art {
 namespace mirror {
 
 template<class T>
-inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class, int32_t length) {
-  Array* array = Array::Alloc(self, object_array_class, length, sizeof(Object*));
-  if (UNLIKELY(array == NULL)) {
-    return NULL;
+inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
+                                             int32_t length, gc::AllocatorType allocator_type) {
+  Array* array = Array::Alloc<true>(self, object_array_class, length, sizeof(Object*),
+                                    allocator_type);
+  if (UNLIKELY(array == nullptr)) {
+    return nullptr;
   } else {
     return array->AsObjectArray<T>();
   }
 }
 
 template<class T>
+inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
+                                             int32_t length) {
+  return Alloc(self, object_array_class, length,
+               Runtime::Current()->GetHeap()->GetCurrentAllocator());
+}
+
+template<class T>
 inline T* ObjectArray<T>::Get(int32_t i) const {
   if (UNLIKELY(!IsValidIndex(i))) {
     return NULL;
@@ -134,9 +144,14 @@
 
 template<class T>
 inline ObjectArray<T>* ObjectArray<T>::CopyOf(Thread* self, int32_t new_length) {
-  ObjectArray<T>* new_array = Alloc(self, GetClass(), new_length);
-  if (LIKELY(new_array != NULL)) {
-    Copy(this, 0, new_array, 0, std::min(GetLength(), new_length));
+  // We may get copied by a compacting GC.
+  SirtRef<ObjectArray<T> > sirt_this(self, this);
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  gc::AllocatorType allocator_type = heap->IsMovableObject(this) ? heap->GetCurrentAllocator() :
+      heap->GetCurrentNonMovingAllocator();
+  ObjectArray<T>* new_array = Alloc(self, GetClass(), new_length, allocator_type);
+  if (LIKELY(new_array != nullptr)) {
+    Copy(sirt_this.get(), 0, new_array, 0, std::min(sirt_this->GetLength(), new_length));
   }
   return new_array;
 }
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 09ff519..5da8845 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_OBJECT_ARRAY_H_
 
 #include "array.h"
+#include "gc/heap.h"
 
 namespace art {
 namespace mirror {
@@ -25,6 +26,10 @@
 template<class T>
 class MANAGED ObjectArray : public Array {
  public:
+  static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length,
+                               gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index d0d1ee4..8272ff8 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -144,15 +144,15 @@
 TEST_F(ObjectTest, AllocArray) {
   ScopedObjectAccess soa(Thread::Current());
   Class* c = class_linker_->FindSystemClass("[I");
-  SirtRef<Array> a(soa.Self(), Array::Alloc(soa.Self(), c, 1));
+  SirtRef<Array> a(soa.Self(), Array::Alloc<true>(soa.Self(), c, 1));
   ASSERT_TRUE(c == a->GetClass());
 
   c = class_linker_->FindSystemClass("[Ljava/lang/Object;");
-  a.reset(Array::Alloc(soa.Self(), c, 1));
+  a.reset(Array::Alloc<true>(soa.Self(), c, 1));
   ASSERT_TRUE(c == a->GetClass());
 
   c = class_linker_->FindSystemClass("[[Ljava/lang/Object;");
-  a.reset(Array::Alloc(soa.Self(), c, 1));
+  a.reset(Array::Alloc<true>(soa.Self(), c, 1));
   ASSERT_TRUE(c == a->GetClass());
 }
 
@@ -221,7 +221,8 @@
       java_lang_dex_file_->GetIndexForStringId(*string_id));
   ASSERT_TRUE(type_id != NULL);
   uint32_t type_idx = java_lang_dex_file_->GetIndexForTypeId(*type_id);
-  Object* array = CheckAndAllocArrayFromCode(type_idx, sort, 3, Thread::Current(), false);
+  Object* array = CheckAndAllocArrayFromCode(type_idx, sort, 3, Thread::Current(), false,
+                                             Runtime::Current()->GetHeap()->GetCurrentAllocator());
   EXPECT_TRUE(array->IsArrayInstance());
   EXPECT_EQ(3, array->AsArray()->GetLength());
   EXPECT_TRUE(array->GetClass()->IsArrayClass());
@@ -269,8 +270,9 @@
   const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(class_loader)[0];
   CHECK(dex_file != NULL);
 
+  SirtRef<mirror::ClassLoader> loader(soa.Self(), soa.Decode<ClassLoader*>(class_loader));
   Class* klass =
-      class_linker_->FindClass("LStaticsFromCode;", soa.Decode<ClassLoader*>(class_loader));
+      class_linker_->FindClass("LStaticsFromCode;", loader);
   ArtMethod* clinit = klass->FindClassInitializer();
   const DexFile::StringId* klass_string_id = dex_file->FindStringId("LStaticsFromCode;");
   ASSERT_TRUE(klass_string_id != NULL);
@@ -392,6 +394,7 @@
 }
 
 TEST_F(ObjectTest, DescriptorCompare) {
+  // Two classloaders conflicts in compile_time_class_paths_.
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* linker = class_linker_;
 
@@ -400,9 +403,9 @@
   SirtRef<ClassLoader> class_loader_1(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader_1));
   SirtRef<ClassLoader> class_loader_2(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader_2));
 
-  Class* klass1 = linker->FindClass("LProtoCompare;", class_loader_1.get());
+  Class* klass1 = linker->FindClass("LProtoCompare;", class_loader_1);
   ASSERT_TRUE(klass1 != NULL);
-  Class* klass2 = linker->FindClass("LProtoCompare2;", class_loader_2.get());
+  Class* klass2 = linker->FindClass("LProtoCompare2;", class_loader_2);
   ASSERT_TRUE(klass2 != NULL);
 
   ArtMethod* m1_1 = klass1->GetVirtualMethod(0);
@@ -468,8 +471,8 @@
   jobject jclass_loader = LoadDex("XandY");
   SirtRef<ClassLoader> class_loader(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader));
 
-  Class* X = class_linker_->FindClass("LX;", class_loader.get());
-  Class* Y = class_linker_->FindClass("LY;", class_loader.get());
+  Class* X = class_linker_->FindClass("LX;", class_loader);
+  Class* Y = class_linker_->FindClass("LY;", class_loader);
   ASSERT_TRUE(X != NULL);
   ASSERT_TRUE(Y != NULL);
 
@@ -501,8 +504,8 @@
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("XandY");
   SirtRef<ClassLoader> class_loader(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader));
-  Class* X = class_linker_->FindClass("LX;", class_loader.get());
-  Class* Y = class_linker_->FindClass("LY;", class_loader.get());
+  Class* X = class_linker_->FindClass("LX;", class_loader);
+  Class* Y = class_linker_->FindClass("LY;", class_loader);
 
   EXPECT_TRUE(X->IsAssignableFrom(X));
   EXPECT_TRUE(X->IsAssignableFrom(Y));
@@ -538,17 +541,17 @@
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("XandY");
   SirtRef<ClassLoader> class_loader(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader));
-  Class* X = class_linker_->FindClass("LX;", class_loader.get());
-  Class* Y = class_linker_->FindClass("LY;", class_loader.get());
+  Class* X = class_linker_->FindClass("LX;", class_loader);
+  Class* Y = class_linker_->FindClass("LY;", class_loader);
   ASSERT_TRUE(X != NULL);
   ASSERT_TRUE(Y != NULL);
 
-  Class* YA = class_linker_->FindClass("[LY;", class_loader.get());
-  Class* YAA = class_linker_->FindClass("[[LY;", class_loader.get());
+  Class* YA = class_linker_->FindClass("[LY;", class_loader);
+  Class* YAA = class_linker_->FindClass("[[LY;", class_loader);
   ASSERT_TRUE(YA != NULL);
   ASSERT_TRUE(YAA != NULL);
 
-  Class* XAA = class_linker_->FindClass("[[LX;", class_loader.get());
+  Class* XAA = class_linker_->FindClass("[[LX;", class_loader);
   ASSERT_TRUE(XAA != NULL);
 
   Class* O = class_linker_->FindSystemClass("Ljava/lang/Object;");
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index 9d76c6b..32a50fe 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -39,19 +39,19 @@
 }
 
 StackTraceElement* StackTraceElement::Alloc(Thread* self,
-                                            String* declaring_class,
-                                            String* method_name,
-                                            String* file_name,
+                                            SirtRef<String>& declaring_class,
+                                            SirtRef<String>& method_name,
+                                            SirtRef<String>& file_name,
                                             int32_t line_number) {
   StackTraceElement* trace =
       down_cast<StackTraceElement*>(GetStackTraceElement()->AllocObject(self));
   if (LIKELY(trace != NULL)) {
     trace->SetFieldObject(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_),
-                          const_cast<String*>(declaring_class), false);
+                          declaring_class.get(), false);
     trace->SetFieldObject(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, method_name_),
-                          const_cast<String*>(method_name), false);
+                          method_name.get(), false);
     trace->SetFieldObject(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, file_name_),
-                          const_cast<String*>(file_name), false);
+                          file_name.get(), false);
     trace->SetField32(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_),
                       line_number, false);
   }
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index a9751f9..2af5128 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_STACK_TRACE_ELEMENT_H_
 
 #include "object.h"
+#include "sirt_ref.h"
 
 namespace art {
 
@@ -49,9 +50,9 @@
   }
 
   static StackTraceElement* Alloc(Thread* self,
-                                  String* declaring_class,
-                                  String* method_name,
-                                  String* file_name,
+                                  SirtRef<String>& declaring_class,
+                                  SirtRef<String>& method_name,
+                                  SirtRef<String>& file_name,
                                   int32_t line_number)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 9c93f17..b372fe7 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -123,8 +123,8 @@
                                int32_t hash_code) {
   CHECK(utf16_data_in != NULL || utf16_length == 0);
   String* string = Alloc(self, GetJavaLangString(), utf16_length);
-  if (string == NULL) {
-    return NULL;
+  if (UNLIKELY(string == nullptr)) {
+    return nullptr;
   }
   // TODO: use 16-bit wide memset variant
   CharArray* array = const_cast<CharArray*>(string->GetCharArray());
@@ -143,8 +143,8 @@
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
-  if (utf == NULL) {
-    return NULL;
+  if (UNLIKELY(utf == nullptr)) {
+    return nullptr;
   }
   size_t char_count = CountModifiedUtf8Chars(utf);
   return AllocFromModifiedUtf8(self, char_count, utf);
@@ -153,8 +153,8 @@
 String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
                                       const char* utf8_data_in) {
   String* string = Alloc(self, GetJavaLangString(), utf16_length);
-  if (string == NULL) {
-    return NULL;
+  if (UNLIKELY(string == nullptr)) {
+    return nullptr;
   }
   uint16_t* utf16_data_out =
       const_cast<uint16_t*>(string->GetCharArray()->GetData());
@@ -164,22 +164,21 @@
 }
 
 String* String::Alloc(Thread* self, Class* java_lang_String, int32_t utf16_length) {
-  SirtRef<CharArray> array(self, CharArray::Alloc(self, utf16_length));
-  if (array.get() == NULL) {
-    return NULL;
+  CharArray* array = CharArray::Alloc(self, utf16_length);
+  if (UNLIKELY(array == nullptr)) {
+    return nullptr;
   }
-  return Alloc(self, java_lang_String, array.get());
+  return Alloc(self, java_lang_String, array);
 }
 
 String* String::Alloc(Thread* self, Class* java_lang_String, CharArray* array) {
   // Hold reference in case AllocObject causes GC.
   SirtRef<CharArray> array_ref(self, array);
   String* string = down_cast<String*>(java_lang_String->AllocObject(self));
-  if (string == NULL) {
-    return NULL;
+  if (LIKELY(string != nullptr)) {
+    string->SetArray(array_ref.get());
+    string->SetCount(array_ref->GetLength());
   }
-  string->SetArray(array);
-  string->SetCount(array->GetLength());
   return string;
 }
 
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 01d8f31..7520c4d 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -117,10 +117,8 @@
 
  private:
   void SetHashCode(int32_t new_hash_code) {
-    DCHECK_EQ(0u,
-              GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), false));
-    SetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_),
-               new_hash_code, false);
+    DCHECK_EQ(0u, GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), false));
+    SetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), new_hash_code, false);
   }
 
   void SetCount(int32_t new_count) {
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 2abfd3d..af93a56 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -82,6 +82,7 @@
 Monitor::Monitor(Thread* owner, mirror::Object* obj, int32_t hash_code)
     : monitor_lock_("a monitor lock", kMonitorLock),
       monitor_contenders_("monitor contenders", monitor_lock_),
+      num_waiters_(0),
       owner_(owner),
       lock_count_(0),
       obj_(obj),
@@ -128,6 +129,10 @@
       LOG(FATAL) << "Inflating unlocked lock word";
       break;
     }
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lw.GetState();
+      return false;
+    }
   }
   LockWord fat(this);
   // Publish the updated lock word, which may race with other threads.
@@ -140,8 +145,7 @@
 }
 
 Monitor::~Monitor() {
-  CHECK(obj_ != NULL);
-  CHECK_EQ(obj_->GetLockWord().GetState(), LockWord::kFatLocked);
+  // Deflated monitors have a null object.
 }
 
 /*
@@ -222,7 +226,9 @@
       ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
       MutexLock mu2(self, monitor_lock_);  // Reacquire monitor_lock_ without mutator_lock_ for Wait.
       if (owner_ != NULL) {  // Did the owner_ give the lock up?
+        ++num_waiters_;
         monitor_contenders_.Wait(self);  // Still contended so wait.
+        --num_waiters_;
         // Woken from contention.
         if (log_contention) {
           uint64_t wait_ms = MilliTime() - wait_start_ms;
@@ -559,6 +565,43 @@
   }
 }
 
+bool Monitor::Deflate(Thread* self, mirror::Object* obj) {
+  DCHECK(obj != nullptr);
+  LockWord lw(obj->GetLockWord());
+  // If the lock isn't an inflated monitor, then we don't need to deflate anything.
+  if (lw.GetState() == LockWord::kFatLocked) {
+    Monitor* monitor = lw.FatLockMonitor();
+    CHECK(monitor != nullptr);
+    MutexLock mu(self, monitor->monitor_lock_);
+    Thread* owner = monitor->owner_;
+    if (owner != nullptr) {
+      // Can't deflate if we are locked and have a hash code.
+      if (monitor->HasHashCode()) {
+        return false;
+      }
+      // Can't deflate if our lock count is too high.
+      if (monitor->lock_count_ > LockWord::kThinLockMaxCount) {
+        return false;
+      }
+      // Can't deflate if we have anybody waiting on the CV.
+      if (monitor->num_waiters_ > 0) {
+        return false;
+      }
+      // Deflate to a thin lock.
+      obj->SetLockWord(LockWord::FromThinLockId(owner->GetTid(), monitor->lock_count_));
+    } else if (monitor->HasHashCode()) {
+      obj->SetLockWord(LockWord::FromHashCode(monitor->GetHashCode()));
+    } else {
+      // No lock and no hash, just put an empty lock word inside the object.
+      obj->SetLockWord(LockWord());
+    }
+    // The monitor is deflated, mark the object as nullptr so that we know to delete it during the
+    // next GC.
+    monitor->obj_ = nullptr;
+  }
+  return true;
+}
+
 /*
  * Changes the shape of a monitor from thin to fat, preserving the internal lock state. The calling
  * thread must own the lock or the owner must be suspended. There's a race with other threads
@@ -577,13 +620,13 @@
   }
 }
 
-void Monitor::InflateThinLocked(Thread* self, mirror::Object* obj, LockWord lock_word,
+void Monitor::InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, LockWord lock_word,
                                 uint32_t hash_code) {
   DCHECK_EQ(lock_word.GetState(), LockWord::kThinLocked);
   uint32_t owner_thread_id = lock_word.ThinLockOwner();
   if (owner_thread_id == self->GetThreadId()) {
     // We own the monitor, we can easily inflate it.
-    Inflate(self, self, obj, hash_code);
+    Inflate(self, self, obj.get(), hash_code);
   } else {
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
     // Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
@@ -598,7 +641,7 @@
         if (lock_word.GetState() == LockWord::kThinLocked &&
             lock_word.ThinLockOwner() == owner_thread_id) {
           // Go ahead and inflate the lock.
-          Inflate(self, owner, obj, hash_code);
+          Inflate(self, owner, obj.get(), hash_code);
         }
         thread_list->Resume(owner, false);
       }
@@ -611,12 +654,13 @@
   DCHECK(obj != NULL);
   uint32_t thread_id = self->GetThreadId();
   size_t contention_count = 0;
+  SirtRef<mirror::Object> sirt_obj(self, obj);
   while (true) {
-    LockWord lock_word = obj->GetLockWord();
+    LockWord lock_word = sirt_obj->GetLockWord();
     switch (lock_word.GetState()) {
       case LockWord::kUnlocked: {
         LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
-        if (obj->CasLockWord(lock_word, thin_locked)) {
+        if (sirt_obj->CasLockWord(lock_word, thin_locked)) {
           return;  // Success!
         }
         continue;  // Go again.
@@ -628,11 +672,11 @@
           uint32_t new_count = lock_word.ThinLockCount() + 1;
           if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
             LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
-            obj->SetLockWord(thin_locked);
+            sirt_obj->SetLockWord(thin_locked);
             return;  // Success!
           } else {
             // We'd overflow the recursion count, so inflate the monitor.
-            InflateThinLocked(self, obj, lock_word, 0);
+            InflateThinLocked(self, sirt_obj, lock_word, 0);
           }
         } else {
           // Contention.
@@ -642,7 +686,7 @@
             NanoSleep(1000);  // Sleep for 1us and re-attempt.
           } else {
             contention_count = 0;
-            InflateThinLocked(self, obj, lock_word, 0);
+            InflateThinLocked(self, sirt_obj, lock_word, 0);
           }
         }
         continue;  // Start from the beginning.
@@ -654,9 +698,13 @@
       }
       case LockWord::kHashCode: {
         // Inflate with the existing hashcode.
-        Inflate(self, nullptr, obj, lock_word.GetHashCode());
+        Inflate(self, nullptr, sirt_obj.get(), lock_word.GetHashCode());
         break;
       }
+      default: {
+        LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+        return;
+      }
     }
   }
 }
@@ -666,11 +714,12 @@
   DCHECK(obj != NULL);
 
   LockWord lock_word = obj->GetLockWord();
+  SirtRef<mirror::Object> sirt_obj(self, obj);
   switch (lock_word.GetState()) {
     case LockWord::kHashCode:
       // Fall-through.
     case LockWord::kUnlocked:
-      FailedUnlock(obj, self, NULL, NULL);
+      FailedUnlock(sirt_obj.get(), self, NULL, NULL);
       return false;  // Failure.
     case LockWord::kThinLocked: {
       uint32_t thread_id = self->GetThreadId();
@@ -679,16 +728,16 @@
         // TODO: there's a race here with the owner dying while we unlock.
         Thread* owner =
             Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
-        FailedUnlock(obj, self, owner, NULL);
+        FailedUnlock(sirt_obj.get(), self, owner, NULL);
         return false;  // Failure.
       } else {
         // We own the lock, decrease the recursion count.
         if (lock_word.ThinLockCount() != 0) {
           uint32_t new_count = lock_word.ThinLockCount() - 1;
           LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
-          obj->SetLockWord(thin_locked);
+          sirt_obj->SetLockWord(thin_locked);
         } else {
-          obj->SetLockWord(LockWord());
+          sirt_obj->SetLockWord(LockWord());
         }
         return true;  // Success!
       }
@@ -697,9 +746,10 @@
       Monitor* mon = lock_word.FatLockMonitor();
       return mon->Unlock(self);
     }
-    default:
-      LOG(FATAL) << "Unreachable";
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
       return false;
+    }
   }
 }
 
@@ -733,6 +783,10 @@
     }
     case LockWord::kFatLocked:
       break;  // Already set for a wait.
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      return;
+    }
   }
   Monitor* mon = lock_word.FatLockMonitor();
   mon->Wait(self, ms, ns, interruptShouldThrow, why);
@@ -769,6 +823,10 @@
       }
       return;  // Success.
     }
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      return;
+    }
   }
 }
 
@@ -787,9 +845,10 @@
       Monitor* mon = lock_word.FatLockMonitor();
       return mon->GetOwnerThreadId();
     }
-    default:
+    default: {
       LOG(FATAL) << "Unreachable";
       return ThreadList::kInvalidThreadId;
+    }
   }
 }
 
@@ -1011,7 +1070,8 @@
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
     mirror::Object* obj = m->GetObject();
-    mirror::Object* new_obj = visitor(obj, arg);
+    // The object of a monitor can be null if we have deflated it.
+    mirror::Object* new_obj = obj != nullptr ? visitor(obj, arg) : nullptr;
     if (new_obj == nullptr) {
       VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
                     << m->GetObject();
@@ -1031,6 +1091,8 @@
   switch (lock_word.GetState()) {
     case LockWord::kUnlocked:
       // Fall-through.
+    case LockWord::kForwardingAddress:
+      // Fall-through.
     case LockWord::kHashCode:
       break;
     case LockWord::kThinLocked:
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 09cfafa..bfd8545 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -27,6 +27,7 @@
 #include "atomic_integer.h"
 #include "base/mutex.h"
 #include "root_visitor.h"
+#include "sirt_ref.h"
 #include "thread_state.h"
 
 namespace art {
@@ -107,9 +108,12 @@
     return hash_code_.load() != 0;
   }
 
-  static void InflateThinLocked(Thread* self, mirror::Object* obj, LockWord lock_word,
+  static void InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, LockWord lock_word,
                                 uint32_t hash_code) NO_THREAD_SAFETY_ANALYSIS;
 
+  static bool Deflate(Thread* self, mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   explicit Monitor(Thread* owner, mirror::Object* obj, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -170,6 +174,9 @@
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable monitor_contenders_ GUARDED_BY(monitor_lock_);
 
+  // Number of people waiting on the condition.
+  size_t num_waiters_ GUARDED_BY(monitor_lock_);
+
   // Which thread currently owns the lock?
   Thread* volatile owner_ GUARDED_BY(monitor_lock_);
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index ab5eab3..c9e0e83 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -161,7 +161,7 @@
   ScopedObjectAccess soa(env);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   class_linker->RegisterDexFile(*dex_file);
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(javaLoader);
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(javaLoader));
   mirror::Class* result = class_linker->DefineClass(descriptor.c_str(), class_loader, *dex_file,
                                                     *dex_class_def);
   VLOG(class_linker) << "DexFile_defineClassNative returning " << result;
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 96c3e78..67c4505 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -20,6 +20,7 @@
 #include "class_linker.h"
 #include "common_throws.h"
 #include "debugger.h"
+#include "gc/space/bump_pointer_space.h"
 #include "gc/space/dlmalloc_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -247,7 +248,7 @@
 // /proc/<pid>/smaps.
 static void VMDebug_getHeapSpaceStats(JNIEnv* env, jclass, jlongArray data) {
   jlong* arr = reinterpret_cast<jlong*>(env->GetPrimitiveArrayCritical(data, 0));
-  if (arr == NULL || env->GetArrayLength(data) < 9) {
+  if (arr == nullptr || env->GetArrayLength(data) < 9) {
     return;
   }
 
@@ -257,29 +258,26 @@
   size_t zygoteUsed = 0;
   size_t largeObjectsSize = 0;
   size_t largeObjectsUsed = 0;
-
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  const std::vector<gc::space::ContinuousSpace*>& continuous_spaces = heap->GetContinuousSpaces();
-  const std::vector<gc::space::DiscontinuousSpace*>& discontinuous_spaces = heap->GetDiscontinuousSpaces();
-  typedef std::vector<gc::space::ContinuousSpace*>::const_iterator It;
-  for (It it = continuous_spaces.begin(), end = continuous_spaces.end(); it != end; ++it) {
-    gc::space::ContinuousSpace* space = *it;
+  for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
     if (space->IsImageSpace()) {
       // Currently don't include the image space.
     } else if (space->IsZygoteSpace()) {
-      gc::space::DlMallocSpace* dlmalloc_space = space->AsDlMallocSpace();
-      zygoteSize += dlmalloc_space->GetFootprint();
-      zygoteUsed += dlmalloc_space->GetBytesAllocated();
-    } else {
-      // This is the alloc space.
-      gc::space::DlMallocSpace* dlmalloc_space = space->AsDlMallocSpace();
-      allocSize += dlmalloc_space->GetFootprint();
-      allocUsed += dlmalloc_space->GetBytesAllocated();
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      zygoteSize += malloc_space->GetFootprint();
+      zygoteUsed += malloc_space->GetBytesAllocated();
+    } else if (space->IsMallocSpace()) {
+      // This is a malloc space.
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      allocSize += malloc_space->GetFootprint();
+      allocUsed += malloc_space->GetBytesAllocated();
+    } else if (space->IsBumpPointerSpace()) {
+      gc::space::BumpPointerSpace* bump_pointer_space = space->AsBumpPointerSpace();
+      allocSize += bump_pointer_space->Size();
+      allocUsed += bump_pointer_space->GetBytesAllocated();
     }
   }
-  typedef std::vector<gc::space::DiscontinuousSpace*>::const_iterator It2;
-  for (It2 it = discontinuous_spaces.begin(), end = discontinuous_spaces.end(); it != end; ++it) {
-    gc::space::DiscontinuousSpace* space = *it;
+  for (gc::space::DiscontinuousSpace* space : heap->GetDiscontinuousSpaces()) {
     if (space->IsLargeObjectSpace()) {
       largeObjectsSize += space->AsLargeObjectSpace()->GetBytesAllocated();
       largeObjectsUsed += largeObjectsSize;
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index aef000c..fd3d91e 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -53,18 +53,9 @@
 static void VMRuntime_disableJitCompilation(JNIEnv*, jobject) {
 }
 
-static jobject VMRuntime_newNonMovableArray(JNIEnv* env,
-                                            jobject,
-                                            jclass javaElementClass,
+static jobject VMRuntime_newNonMovableArray(JNIEnv* env, jobject, jclass javaElementClass,
                                             jint length) {
   ScopedFastNativeObjectAccess soa(env);
-#ifdef MOVING_GARBAGE_COLLECTOR
-  // TODO: right now, we don't have a copying collector, so there's no need
-  // to do anything special here, but we ought to pass the non-movability
-  // through to the allocator.
-  UNIMPLEMENTED(FATAL);
-#endif
-
   mirror::Class* element_class = soa.Decode<mirror::Class*>(javaElementClass);
   if (element_class == NULL) {
     ThrowNullPointerException(NULL, "element class == null");
@@ -74,13 +65,14 @@
     ThrowNegativeArraySizeException(length);
     return NULL;
   }
-
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   std::string descriptor;
   descriptor += "[";
   descriptor += ClassHelper(element_class).GetDescriptor();
-  mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), NULL);
-  mirror::Array* result = mirror::Array::Alloc(soa.Self(), array_class, length);
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
+  mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
+  mirror::Array* result = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
+                                                     Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
   return soa.AddLocalReference<jobject>(result);
 }
 
@@ -94,7 +86,10 @@
     ThrowIllegalArgumentException(NULL, "not an array");
     return 0;
   }
-  // TODO: we should also check that this is a non-movable array.
+  if (Runtime::Current()->GetHeap()->IsMovableObject(array)) {
+    ThrowRuntimeException("Trying to get address of movable array object");
+    return 0;
+  }
   return reinterpret_cast<uintptr_t>(array->GetRawData(array->GetClass()->GetComponentSize()));
 }
 
@@ -172,28 +167,7 @@
 }
 
 static void VMRuntime_trimHeap(JNIEnv*, jobject) {
-  uint64_t start_ns = NanoTime();
-
-  // Trim the managed heap.
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  float managed_utilization = (static_cast<float>(heap->GetBytesAllocated()) /
-                               heap->GetTotalMemory());
-  size_t managed_reclaimed = heap->Trim();
-
-  uint64_t gc_heap_end_ns = NanoTime();
-
-  // Trim the native heap.
-  dlmalloc_trim(0);
-  size_t native_reclaimed = 0;
-  dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
-
-  uint64_t end_ns = NanoTime();
-
-  LOG(INFO) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
-      << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
-      << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed)
-      << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization)
-      << "%.";
+  Runtime::Current()->GetHeap()->Trim();
 }
 
 static void VMRuntime_concurrentGC(JNIEnv* env, jobject) {
@@ -212,7 +186,7 @@
 }
 
 // Based on ClassLinker::ResolveString.
-static void PreloadDexCachesResolveString(mirror::DexCache* dex_cache,
+static void PreloadDexCachesResolveString(SirtRef<mirror::DexCache>& dex_cache,
                                           uint32_t string_idx,
                                           StringTable& strings)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -260,7 +234,7 @@
 }
 
 // Based on ClassLinker::ResolveField.
-static void PreloadDexCachesResolveField(mirror::DexCache* dex_cache,
+static void PreloadDexCachesResolveField(SirtRef<mirror::DexCache>& dex_cache,
                                          uint32_t field_idx,
                                          bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -275,9 +249,9 @@
     return;
   }
   if (is_static) {
-    field = klass->FindStaticField(dex_cache, field_idx);
+    field = klass->FindStaticField(dex_cache.get(), field_idx);
   } else {
-    field = klass->FindInstanceField(dex_cache, field_idx);
+    field = klass->FindInstanceField(dex_cache.get(), field_idx);
   }
   if (field == NULL) {
     return;
@@ -287,7 +261,7 @@
 }
 
 // Based on ClassLinker::ResolveMethod.
-static void PreloadDexCachesResolveMethod(mirror::DexCache* dex_cache,
+static void PreloadDexCachesResolveMethod(SirtRef<mirror::DexCache>& dex_cache,
                                           uint32_t method_idx,
                                           InvokeType invoke_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -304,14 +278,14 @@
   switch (invoke_type) {
     case kDirect:
     case kStatic:
-      method = klass->FindDirectMethod(dex_cache, method_idx);
+      method = klass->FindDirectMethod(dex_cache.get(), method_idx);
       break;
     case kInterface:
-      method = klass->FindInterfaceMethod(dex_cache, method_idx);
+      method = klass->FindInterfaceMethod(dex_cache.get(), method_idx);
       break;
     case kSuper:
     case kVirtual:
-      method = klass->FindVirtualMethod(dex_cache, method_idx);
+      method = klass->FindVirtualMethod(dex_cache.get(), method_idx);
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << invoke_type;
@@ -430,6 +404,7 @@
 
   Runtime* runtime = Runtime::Current();
   ClassLinker* linker = runtime->GetClassLinker();
+  Thread* self = ThreadForEnv(env);
 
   // We use a std::map to avoid heap allocating StringObjects to lookup in gDvm.literalStrings
   StringTable strings;
@@ -441,7 +416,7 @@
   for (size_t i = 0; i< boot_class_path.size(); i++) {
     const DexFile* dex_file = boot_class_path[i];
     CHECK(dex_file != NULL);
-    mirror::DexCache* dex_cache = linker->FindDexCache(*dex_file);
+    SirtRef<mirror::DexCache> dex_cache(self, linker->FindDexCache(*dex_file));
 
     if (kPreloadDexCachesStrings) {
       for (size_t i = 0; i < dex_cache->NumStrings(); i++) {
@@ -451,7 +426,7 @@
 
     if (kPreloadDexCachesTypes) {
       for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
-        PreloadDexCachesResolveType(dex_cache, i);
+        PreloadDexCachesResolveType(dex_cache.get(), i);
       }
     }
 
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 3591611..3389107 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -61,7 +61,8 @@
   }
 
   std::string descriptor(DotToDescriptor(name.c_str()));
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(javaLoader);
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                            soa.Decode<mirror::ClassLoader*>(javaLoader));
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   mirror::Class* c = class_linker->FindClass(descriptor.c_str(), class_loader);
   if (c == NULL) {
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index a2d6b18..52cdb59 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -52,13 +52,14 @@
   descriptor += ClassHelper(element_class).GetDescriptor();
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), element_class->GetClassLoader());
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), element_class->GetClassLoader());
+  mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
   if (UNLIKELY(array_class == NULL)) {
     CHECK(soa.Self()->IsExceptionPending());
     return NULL;
   }
   DCHECK(array_class->IsArrayClass());
-  mirror::Array* new_array = mirror::Array::Alloc(soa.Self(), array_class, length);
+  mirror::Array* new_array = mirror::Array::Alloc<true>(soa.Self(), array_class, length);
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index aa72755..04dfcb5 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -56,7 +56,7 @@
     return NULL;
   }
 
-  mirror::Object* receiver = c->AllocObject(soa.Self());
+  mirror::Object* receiver = c->AllocNonMovableObject(soa.Self());
   if (receiver == NULL) {
     return NULL;
   }
diff --git a/runtime/native/java_lang_reflect_Proxy.cc b/runtime/native/java_lang_reflect_Proxy.cc
index a92823a..809369a 100644
--- a/runtime/native/java_lang_reflect_Proxy.cc
+++ b/runtime/native/java_lang_reflect_Proxy.cc
@@ -23,20 +23,12 @@
 
 namespace art {
 
-static jclass Proxy_generateProxy(JNIEnv* env, jclass, jstring javaName,
-                                  jobjectArray javaInterfaces, jobject javaLoader,
-                                  jobjectArray javaMethods, jobjectArray javaThrows) {
+static jclass Proxy_generateProxy(JNIEnv* env, jclass, jstring name, jobjectArray interfaces,
+                                  jobject loader, jobjectArray methods, jobjectArray throws) {
   ScopedObjectAccess soa(env);
-  mirror::String* name = soa.Decode<mirror::String*>(javaName);
-  mirror::ObjectArray<mirror::Class>* interfaces =
-      soa.Decode<mirror::ObjectArray<mirror::Class>*>(javaInterfaces);
-  mirror::ClassLoader* loader = soa.Decode<mirror::ClassLoader*>(javaLoader);
-  mirror::ObjectArray<mirror::ArtMethod>* methods =
-      soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(javaMethods);
-  mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >* throws =
-      soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(javaThrows);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::Class* result = class_linker->CreateProxyClass(name, interfaces, loader, methods, throws);
+  mirror::Class* result = class_linker->CreateProxyClass(soa, name, interfaces, loader, methods,
+                                                         throws);
   return soa.AddLocalReference<jclass>(result);
 }
 
diff --git a/runtime/native/scoped_fast_native_object_access.h b/runtime/native/scoped_fast_native_object_access.h
index d941ec3..1658d96 100644
--- a/runtime/native/scoped_fast_native_object_access.h
+++ b/runtime/native/scoped_fast_native_object_access.h
@@ -63,10 +63,6 @@
     Locks::mutator_lock_->AssertSharedHeld(Self());
     // Don't work with raw objects in non-runnable states.
     DCHECK_EQ(Self()->GetState(), kRunnable);
-#ifdef MOVING_GARBAGE_COLLECTOR
-    // TODO: we should make these unique weak globals if Field instances can ever move.
-    UNIMPLEMENTED(WARNING);
-#endif
     return reinterpret_cast<mirror::ArtField*>(fid);
   }
 
@@ -83,6 +79,10 @@
       return NULL;
     }
 
+    if (kIsDebugBuild) {
+      Runtime::Current()->GetHeap()->VerifyObject(obj);
+    }
+
     DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
 
     IndirectReferenceTable& locals = Env()->locals;
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index f724776..e37510c 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -67,12 +67,9 @@
 
 class ClassHelper {
  public:
-  ClassHelper(const mirror::Class* c = NULL, ClassLinker* l = NULL)
+  explicit ClassHelper(const mirror::Class* c )
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : class_linker_(l),
-        dex_cache_(NULL),
-        dex_file_(NULL),
-        interface_type_list_(NULL),
+      : interface_type_list_(NULL),
         klass_(NULL) {
     if (c != NULL) {
       ChangeClass(c);
@@ -82,13 +79,9 @@
   void ChangeClass(const mirror::Class* new_c)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(new_c != NULL) << "klass_=" << klass_;  // Log what we were changing from if any
-    CHECK(new_c->IsClass()) << "new_c=" << new_c;
-    if (dex_cache_ != NULL) {
-      mirror::DexCache* new_c_dex_cache = new_c->GetDexCache();
-      if (new_c_dex_cache != dex_cache_) {
-        dex_cache_ = new_c_dex_cache;
-        dex_file_ = NULL;
-      }
+    if (!new_c->IsClass()) {
+      LOG(FATAL) << "new_c=" << new_c << " cc " << new_c->GetClass() << " ccc "
+          << ((new_c->GetClass() != nullptr) ? new_c->GetClass()->GetClass() : NULL);
     }
     klass_ = new_c;
     interface_type_list_ = NULL;
@@ -201,20 +194,11 @@
   }
 
   const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (dex_file_ == NULL) {
-      dex_file_ = GetDexCache()->GetDexFile();
-    }
-    return *dex_file_;
+    return *GetDexCache()->GetDexFile();
   }
 
   mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::DexCache* result = dex_cache_;
-    if (result == NULL) {
-      DCHECK(klass_ != NULL);
-      result = klass_->GetDexCache();
-      dex_cache_ = result;
-    }
-    return result;
+    return klass_->GetDexCache();
   }
 
  private:
@@ -231,18 +215,10 @@
     return result;
   }
 
-  ClassLinker* GetClassLinker() {
-    ClassLinker* result = class_linker_;
-    if (result == NULL) {
-      result = Runtime::Current()->GetClassLinker();
-      class_linker_ = result;
-    }
-    return result;
+  ClassLinker* GetClassLinker() ALWAYS_INLINE {
+    return Runtime::Current()->GetClassLinker();
   }
 
-  ClassLinker* class_linker_;
-  mirror::DexCache* dex_cache_;
-  const DexFile* dex_file_;
   const DexFile::TypeList* interface_type_list_;
   const mirror::Class* klass_;
   std::string descriptor_;
@@ -252,20 +228,11 @@
 
 class FieldHelper {
  public:
-  FieldHelper() : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), field_(NULL) {}
-  explicit FieldHelper(const mirror::ArtField* f) : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), field_(f) {}
-  FieldHelper(const mirror::ArtField* f, ClassLinker* l)
-      : class_linker_(l), dex_cache_(NULL), dex_file_(NULL), field_(f) {}
+  FieldHelper() : field_(NULL) {}
+  explicit FieldHelper(const mirror::ArtField* f) : field_(f) {}
 
   void ChangeField(const mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(new_f != NULL);
-    if (dex_cache_ != NULL) {
-      mirror::DexCache* new_f_dex_cache = new_f->GetDeclaringClass()->GetDexCache();
-      if (new_f_dex_cache != dex_cache_) {
-        dex_cache_ = new_f_dex_cache;
-        dex_file_ = NULL;
-      }
-    }
     field_ = new_f;
   }
 
@@ -343,31 +310,14 @@
 
  private:
   mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::DexCache* result = dex_cache_;
-    if (result == NULL) {
-      result = field_->GetDeclaringClass()->GetDexCache();
-      dex_cache_ = result;
-    }
-    return result;
+    return field_->GetDeclaringClass()->GetDexCache();
   }
-  ClassLinker* GetClassLinker() {
-    ClassLinker* result = class_linker_;
-    if (result == NULL) {
-      result = Runtime::Current()->GetClassLinker();
-      class_linker_ = result;
-    }
-    return result;
+  ClassLinker* GetClassLinker() ALWAYS_INLINE {
+    return Runtime::Current()->GetClassLinker();
   }
   const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (dex_file_ == NULL) {
-      dex_file_ = GetDexCache()->GetDexFile();
-    }
-    return *dex_file_;
+    return *GetDexCache()->GetDexFile();
   }
-
-  ClassLinker* class_linker_;
-  mirror::DexCache* dex_cache_;
-  const DexFile* dex_file_;
   const mirror::ArtField* field_;
   std::string declaring_class_descriptor_;
 
@@ -377,38 +327,17 @@
 class MethodHelper {
  public:
   MethodHelper()
-     : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
+     : method_(NULL), shorty_(NULL),
        shorty_len_(0) {}
 
   explicit MethodHelper(const mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
-        shorty_len_(0) {
-    SetMethod(m);
-  }
-
-  MethodHelper(const mirror::ArtMethod* m, ClassLinker* l)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : class_linker_(l), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
-        shorty_len_(0) {
+      : method_(NULL), shorty_(NULL), shorty_len_(0) {
     SetMethod(m);
   }
 
   void ChangeMethod(mirror::ArtMethod* new_m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(new_m != NULL);
-    if (dex_cache_ != NULL) {
-      mirror::Class* klass = new_m->GetDeclaringClass();
-      if (klass->IsProxyClass()) {
-        dex_cache_ = NULL;
-        dex_file_ = NULL;
-      } else {
-        mirror::DexCache* new_m_dex_cache = klass->GetDexCache();
-        if (new_m_dex_cache != dex_cache_) {
-          dex_cache_ = new_m_dex_cache;
-          dex_file_ = NULL;
-        }
-      }
-    }
     SetMethod(new_m);
     shorty_ = NULL;
   }
@@ -444,7 +373,8 @@
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
     const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
-    return GetClassLinker()->ResolveString(dex_file, method_id.name_idx_, GetDexCache());
+    SirtRef<mirror::DexCache> dex_cache(Thread::Current(), GetDexCache());
+    return GetClassLinker()->ResolveString(dex_file, method_id.name_idx_, dex_cache);
   }
 
   const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -622,28 +552,18 @@
   }
 
   const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile* result = dex_file_;
-    if (result == NULL) {
-      const mirror::DexCache* dex_cache = GetDexCache();
-      result = dex_file_ = dex_cache->GetDexFile();
-    }
-    return *result;
+    return *GetDexCache()->GetDexFile();
   }
 
   mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::DexCache* result = dex_cache_;
-    if (result == NULL) {
-      mirror::Class* klass = method_->GetDeclaringClass();
-      result = klass->GetDexCache();
-      dex_cache_ = result;
-    }
-    return result;
+    return method_->GetDeclaringClass()->GetDexCache();
   }
 
   mirror::String* ResolveString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::String* s = method_->GetDexCacheStrings()->Get(string_idx);
     if (UNLIKELY(s == NULL)) {
-      s = GetClassLinker()->ResolveString(GetDexFile(), string_idx, GetDexCache());
+      SirtRef<mirror::DexCache> dex_cache(Thread::Current(), GetDexCache());
+      s = GetClassLinker()->ResolveString(GetDexFile(), string_idx, dex_cache);
     }
     return s;
   }
@@ -705,18 +625,10 @@
     method_ = method;
   }
 
-  ClassLinker* GetClassLinker() {
-    ClassLinker* result = class_linker_;
-    if (result == NULL) {
-      result = Runtime::Current()->GetClassLinker();
-      class_linker_ = result;
-    }
-    return result;
+  ClassLinker* GetClassLinker() ALWAYS_INLINE {
+    return Runtime::Current()->GetClassLinker();
   }
 
-  ClassLinker* class_linker_;
-  mirror::DexCache* dex_cache_;
-  const DexFile* dex_file_;
   const mirror::ArtMethod* method_;
   const char* shorty_;
   uint32_t shorty_len_;
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index e95fdb9..6f65bff 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -233,7 +233,7 @@
 
 void ReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
   for (auto& ref : entries_) {
-    ref = visitor(const_cast<mirror::Object*>(ref), arg);
+    ref = visitor(ref, arg);
   }
 }
 
diff --git a/runtime/root_visitor.h b/runtime/root_visitor.h
index a2d898b..d52f351 100644
--- a/runtime/root_visitor.h
+++ b/runtime/root_visitor.h
@@ -23,11 +23,13 @@
 }  // namespace mirror
 class StackVisitor;
 
+// Returns the new address of the object, returns root if it has not moved.
 typedef mirror::Object* (RootVisitor)(mirror::Object* root, void* arg)
     __attribute__((warn_unused_result));
 typedef void (VerifyRootVisitor)(const mirror::Object* root, void* arg, size_t vreg,
                                  const StackVisitor* visitor);
 typedef bool (IsMarkedTester)(const mirror::Object* object, void* arg);
+typedef void (ObjectVisitorCallback)(mirror::Object* obj, void* arg);
 
 }  // namespace art
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 34cf45b..6bd2560 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -102,14 +102,20 @@
       use_compile_time_class_path_(false),
       main_thread_group_(NULL),
       system_thread_group_(NULL),
-      system_class_loader_(NULL),
-      quick_alloc_entry_points_instrumentation_counter_(0) {
+      system_class_loader_(NULL) {
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
     callee_save_methods_[i] = NULL;
   }
 }
 
 Runtime::~Runtime() {
+  if (dump_gc_performance_on_shutdown_) {
+    // This can't be called from the Heap destructor below because it
+    // could call RosAlloc::InspectAll() which needs the thread_list
+    // to be still alive.
+    heap_->DumpGcPerformanceInfo(LOG(INFO));
+  }
+
   Thread* self = Thread::Current();
   {
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
@@ -122,9 +128,14 @@
   Trace::Shutdown();
 
   // Make sure to let the GC complete if it is running.
-  heap_->WaitForConcurrentGcToComplete(self);
+  heap_->WaitForGcToComplete(self);
   heap_->DeleteThreadPool();
 
+  // For RosAlloc, revoke thread local runs. Note that in tests
+  // (common_test.h) we repeat allocating and deleting Runtime
+  // objects.
+  heap_->RevokeAllThreadLocalBuffers();
+
   // Make sure our internal threads are dead before we start tearing down things they're using.
   Dbg::StopJdwp();
   delete signal_catcher_;
@@ -352,6 +363,8 @@
   parsed->parallel_gc_threads_ = sysconf(_SC_NPROCESSORS_CONF) - 1;
   // Only the main GC thread, no workers.
   parsed->conc_gc_threads_ = 0;
+  // Default is CMS which is Sticky + Partial + Full CMS GC.
+  parsed->collector_type_ = gc::kCollectorTypeCMS;
   parsed->stack_size_ = 0;  // 0 means default.
   parsed->max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   parsed->low_memory_mode_ = false;
@@ -359,11 +372,11 @@
   parsed->is_compiler_ = false;
   parsed->is_zygote_ = false;
   parsed->interpreter_only_ = false;
-  parsed->is_concurrent_gc_enabled_ = true;
   parsed->is_explicit_gc_disabled_ = false;
 
   parsed->long_pause_log_threshold_ = gc::Heap::kDefaultLongPauseLogThreshold;
   parsed->long_gc_log_threshold_ = gc::Heap::kDefaultLongGCLogThreshold;
+  parsed->dump_gc_performance_on_shutdown_ = false;
   parsed->ignore_max_footprint_ = false;
 
   parsed->lock_profiling_threshold_ = 0;
@@ -524,6 +537,8 @@
     } else if (option == "-XX:LongGCLogThreshold") {
           parsed->long_gc_log_threshold_ =
               ParseMemoryOption(option.substr(strlen("-XX:LongGCLogThreshold")).c_str(), 1024);
+    } else if (option == "-XX:DumpGCPerformanceOnShutdown") {
+      parsed->dump_gc_performance_on_shutdown_ = true;
     } else if (option == "-XX:IgnoreMaxFootprint") {
       parsed->ignore_max_footprint_ = true;
     } else if (option == "-XX:LowMemoryMode") {
@@ -542,10 +557,12 @@
       std::vector<std::string> gc_options;
       Split(option.substr(strlen("-Xgc:")), ',', gc_options);
       for (size_t i = 0; i < gc_options.size(); ++i) {
-        if (gc_options[i] == "noconcurrent") {
-          parsed->is_concurrent_gc_enabled_ = false;
-        } else if (gc_options[i] == "concurrent") {
-          parsed->is_concurrent_gc_enabled_ = true;
+        if (gc_options[i] == "MS" || gc_options[i] == "nonconcurrent") {
+          parsed->collector_type_ = gc::kCollectorTypeMS;
+        } else if (gc_options[i] == "CMS" || gc_options[i] == "concurrent") {
+          parsed->collector_type_ = gc::kCollectorTypeCMS;
+        } else if (gc_options[i] == "SS") {
+          parsed->collector_type_ = gc::kCollectorTypeSS;
         } else {
           LOG(WARNING) << "Ignoring unknown -Xgc option: " << gc_options[i];
         }
@@ -822,6 +839,11 @@
   }
 }
 
+bool Runtime::IsShuttingDown(Thread* self) {
+  MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+  return IsShuttingDownLocked();
+}
+
 void Runtime::StartDaemonThreads() {
   VLOG(startup) << "Runtime::StartDaemonThreads entering";
 
@@ -862,7 +884,6 @@
 
   is_compiler_ = options->is_compiler_;
   is_zygote_ = options->is_zygote_;
-  is_concurrent_gc_enabled_ = options->is_concurrent_gc_enabled_;
   is_explicit_gc_disabled_ = options->is_explicit_gc_disabled_;
 
   compiler_filter_ = options->compiler_filter_;
@@ -898,7 +919,7 @@
                        options->heap_target_utilization_,
                        options->heap_maximum_size_,
                        options->image_,
-                       options->is_concurrent_gc_enabled_,
+                       options->collector_type_,
                        options->parallel_gc_threads_,
                        options->conc_gc_threads_,
                        options->low_memory_mode_,
@@ -906,6 +927,8 @@
                        options->long_gc_log_threshold_,
                        options->ignore_max_footprint_);
 
+  dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
+
   BlockSignals();
   InitPlatformSignalHandlers();
 
@@ -927,12 +950,13 @@
   GetHeap()->EnableObjectValidation();
 
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
-  if (GetHeap()->GetContinuousSpaces()[0]->IsImageSpace()) {
-    class_linker_ = ClassLinker::CreateFromImage(intern_table_);
+  class_linker_ = new ClassLinker(intern_table_);
+  if (GetHeap()->HasImageSpace()) {
+    class_linker_->InitFromImage();
   } else {
     CHECK(options->boot_class_path_ != NULL);
     CHECK_NE(options->boot_class_path_->size(), 0U);
-    class_linker_ = ClassLinker::CreateFromCompiler(*options->boot_class_path_, intern_table_);
+    class_linker_->InitFromCompiler(*options->boot_class_path_);
   }
   CHECK(class_linker_ != NULL);
   verifier::MethodVerifier::Init();
@@ -1077,9 +1101,9 @@
     GetStats()->Clear(~0);
     // TODO: wouldn't it make more sense to clear _all_ threads' stats?
     Thread::Current()->GetStats()->Clear(~0);
-    InstrumentQuickAllocEntryPoints();
+    GetInstrumentation()->InstrumentQuickAllocEntryPoints();
   } else {
-    UninstrumentQuickAllocEntryPoints();
+    GetInstrumentation()->UninstrumentQuickAllocEntryPoints();
   }
   stats_enabled_ = new_state;
 }
@@ -1175,16 +1199,20 @@
         visitor(pre_allocated_OutOfMemoryError_, arg));
     DCHECK(pre_allocated_OutOfMemoryError_ != nullptr);
   }
-  resolution_method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(resolution_method_, arg));
+  resolution_method_ = down_cast<mirror::ArtMethod*>(visitor(resolution_method_, arg));
   DCHECK(resolution_method_ != nullptr);
-  imt_conflict_method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(imt_conflict_method_, arg));
-  DCHECK(imt_conflict_method_ != nullptr);
-  default_imt_ = reinterpret_cast<mirror::ObjectArray<mirror::ArtMethod>*>(visitor(default_imt_, arg));
-  DCHECK(default_imt_ != nullptr);
+  if (HasImtConflictMethod()) {
+    imt_conflict_method_ = down_cast<mirror::ArtMethod*>(visitor(imt_conflict_method_, arg));
+  }
+  if (HasDefaultImt()) {
+    default_imt_ = down_cast<mirror::ObjectArray<mirror::ArtMethod>*>(visitor(default_imt_, arg));
+  }
+
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    callee_save_methods_[i] = reinterpret_cast<mirror::ArtMethod*>(
-        visitor(callee_save_methods_[i], arg));
-    DCHECK(callee_save_methods_[i] != nullptr);
+    if (callee_save_methods_[i] != nullptr) {
+      callee_save_methods_[i] = down_cast<mirror::ArtMethod*>(
+          visitor(callee_save_methods_[i], arg));
+    }
   }
 }
 
@@ -1202,49 +1230,45 @@
   Thread* self = Thread::Current();
   SirtRef<mirror::ObjectArray<mirror::ArtMethod> > imtable(self, cl->AllocArtMethodArray(self, 64));
   mirror::ArtMethod* imt_conflict_method = Runtime::Current()->GetImtConflictMethod();
-  for (size_t i = 0; i < 64; i++) {
+  for (size_t i = 0; i < static_cast<size_t>(imtable->GetLength()); i++) {
     imtable->Set(i, imt_conflict_method);
   }
   return imtable.get();
 }
 
 mirror::ArtMethod* Runtime::CreateImtConflictMethod() {
-  mirror::Class* method_class = mirror::ArtMethod::GetJavaLangReflectArtMethod();
   Thread* self = Thread::Current();
-  SirtRef<mirror::ArtMethod>
-      method(self, down_cast<mirror::ArtMethod*>(method_class->AllocObject(self)));
-  method->SetDeclaringClass(method_class);
+  Runtime* r = Runtime::Current();
+  ClassLinker* cl = r->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for imt conflict method saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   // When compiling, the code pointer will get set later when the image is loaded.
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
   method->SetEntryPointFromCompiledCode(r->IsCompiler() ? NULL : GetImtConflictTrampoline(cl));
   return method.get();
 }
 
 mirror::ArtMethod* Runtime::CreateResolutionMethod() {
-  mirror::Class* method_class = mirror::ArtMethod::GetJavaLangReflectArtMethod();
   Thread* self = Thread::Current();
-  SirtRef<mirror::ArtMethod>
-      method(self, down_cast<mirror::ArtMethod*>(method_class->AllocObject(self)));
-  method->SetDeclaringClass(method_class);
+  Runtime* r = Runtime::Current();
+  ClassLinker* cl = r->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for resolution method saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   // When compiling, the code pointer will get set later when the image is loaded.
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
   method->SetEntryPointFromCompiledCode(r->IsCompiler() ? NULL : GetResolutionTrampoline(cl));
   return method.get();
 }
 
 mirror::ArtMethod* Runtime::CreateCalleeSaveMethod(InstructionSet instruction_set,
-                                                        CalleeSaveType type) {
-  mirror::Class* method_class = mirror::ArtMethod::GetJavaLangReflectArtMethod();
+                                                   CalleeSaveType type) {
   Thread* self = Thread::Current();
-  SirtRef<mirror::ArtMethod>
-      method(self, down_cast<mirror::ArtMethod*>(method_class->AllocObject(self)));
-  method->SetDeclaringClass(method_class);
+  Runtime* r = Runtime::Current();
+  ClassLinker* cl = r->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for callee saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   method->SetEntryPointFromCompiledCode(NULL);
@@ -1336,46 +1360,4 @@
   compile_time_class_paths_.Put(class_loader, class_path);
 }
 
-static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
-  thread->ResetQuickAllocEntryPointsForThread();
-}
-
-void SetQuickAllocEntryPointsInstrumented(bool instrumented);
-
-void Runtime::InstrumentQuickAllocEntryPoints() {
-  ThreadList* tl = thread_list_;
-  Thread* self = Thread::Current();
-  tl->SuspendAll();
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    MutexLock mu2(self, *Locks::thread_list_lock_);
-    DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_, 0);
-    int old_counter = quick_alloc_entry_points_instrumentation_counter_++;
-    if (old_counter == 0) {
-      // If it was disabled, enable it.
-      SetQuickAllocEntryPointsInstrumented(true);
-      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
-    }
-  }
-  tl->ResumeAll();
-}
-
-void Runtime::UninstrumentQuickAllocEntryPoints() {
-  ThreadList* tl = thread_list_;
-  Thread* self = Thread::Current();
-  tl->SuspendAll();
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    MutexLock mu2(self, *Locks::thread_list_lock_);
-    DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_, 0);
-    int new_counter = --quick_alloc_entry_points_instrumentation_counter_;
-    if (new_counter == 0) {
-      // Disable it if the counter becomes zero.
-      SetQuickAllocEntryPointsInstrumented(false);
-      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
-    }
-  }
-  tl->ResumeAll();
-}
-
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 0ce2642..e6951d9 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -27,6 +27,7 @@
 
 #include "base/macros.h"
 #include "base/stringpiece.h"
+#include "gc/collector_type.h"
 #include "gc/heap.h"
 #include "globals.h"
 #include "instruction_set.h"
@@ -99,10 +100,10 @@
     bool is_compiler_;
     bool is_zygote_;
     bool interpreter_only_;
-    bool is_concurrent_gc_enabled_;
     bool is_explicit_gc_disabled_;
     size_t long_pause_log_threshold_;
     size_t long_gc_log_threshold_;
+    bool dump_gc_performance_on_shutdown_;
     bool ignore_max_footprint_;
     size_t heap_initial_size_;
     size_t heap_maximum_size_;
@@ -112,6 +113,7 @@
     double heap_target_utilization_;
     size_t parallel_gc_threads_;
     size_t conc_gc_threads_;
+    gc::CollectorType collector_type_;
     size_t stack_size_;
     size_t max_spins_before_thin_lock_inflation_;
     bool low_memory_mode_;
@@ -149,10 +151,6 @@
     return is_zygote_;
   }
 
-  bool IsConcurrentGcEnabled() const {
-    return is_concurrent_gc_enabled_;
-  }
-
   bool IsExplicitGcDisabled() const {
     return is_explicit_gc_disabled_;
   }
@@ -203,7 +201,8 @@
   // Starts a runtime, which may cause threads to be started and code to run.
   bool Start() UNLOCK_FUNCTION(Locks::mutator_lock_);
 
-  bool IsShuttingDown() const EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_) {
+  bool IsShuttingDown(Thread* self);
+  bool IsShuttingDownLocked() const EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_) {
     return shutting_down_;
   }
 
@@ -442,9 +441,6 @@
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
-  void InstrumentQuickAllocEntryPoints();
-  void UninstrumentQuickAllocEntryPoints();
-
  private:
   static void InitPlatformSignalHandlers();
 
@@ -567,7 +563,8 @@
   // As returned by ClassLoader.getSystemClassLoader().
   jobject system_class_loader_;
 
-  int quick_alloc_entry_points_instrumentation_counter_;
+  // If true, then we dump the GC cumulative timings on shutdown.
+  bool dump_gc_performance_on_shutdown_;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index c39cdb2..1ca6c4e 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -34,9 +34,8 @@
     if (UNLIKELY(self_ == NULL)) {
       // Value chosen arbitrarily and won't be used in the destructor since thread_ == NULL.
       old_thread_state_ = kTerminated;
-      MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
       Runtime* runtime = Runtime::Current();
-      CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown());
+      CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown(self_));
     } else {
       bool runnable_transition;
       DCHECK_EQ(self, Thread::Current());
@@ -63,9 +62,8 @@
   ~ScopedThreadStateChange() LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE {
     if (UNLIKELY(self_ == NULL)) {
       if (!expected_has_no_thread_) {
-        MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
         Runtime* runtime = Runtime::Current();
-        bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+        bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown(nullptr);
         CHECK(shutting_down);
       }
     } else {
@@ -167,6 +165,10 @@
       return NULL;
     }
 
+    if (kIsDebugBuild) {
+      Runtime::Current()->GetHeap()->VerifyObject(obj);
+    }
+
     DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
 
     IndirectReferenceTable& locals = Env()->locals;
@@ -185,7 +187,6 @@
       }
     }
 #endif
-
     if (Vm()->work_around_app_jni_bugs) {
       // Hand out direct pointers to support broken old apps.
       return reinterpret_cast<T>(obj);
@@ -206,10 +207,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-#ifdef MOVING_GARBAGE_COLLECTOR
-    // TODO: we should make these unique weak globals if Field instances can ever move.
-    UNIMPLEMENTED(WARNING);
-#endif
+    CHECK(!kMovingFields);
     return reinterpret_cast<mirror::ArtField*>(fid);
   }
 
@@ -217,9 +215,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-#ifdef MOVING_GARBAGE_COLLECTOR
-    UNIMPLEMENTED(WARNING);
-#endif
+    CHECK(!kMovingFields);
     return reinterpret_cast<jfieldID>(field);
   }
 
@@ -227,10 +223,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-#ifdef MOVING_GARBAGE_COLLECTOR
-    // TODO: we should make these unique weak globals if Method instances can ever move.
-    UNIMPLEMENTED(WARNING);
-#endif
+    CHECK(!kMovingMethods);
     return reinterpret_cast<mirror::ArtMethod*>(mid);
   }
 
@@ -238,9 +231,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-#ifdef MOVING_GARBAGE_COLLECTOR
-    UNIMPLEMENTED(WARNING);
-#endif
+    CHECK(!kMovingMethods);
     return reinterpret_cast<jmethodID>(method);
   }
 
diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h
index a1f8a66..56d81ec 100644
--- a/runtime/sirt_ref.h
+++ b/runtime/sirt_ref.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_SIRT_REF_H_
 #define ART_RUNTIME_SIRT_REF_H_
 
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "thread.h"
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 5d3a9a5..a505383 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -22,12 +22,17 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
+#include "runtime.h"
 #include "thread_list.h"
 #include "throw_location.h"
 #include "vmap_table.h"
 
 namespace art {
 
+bool ShadowFrame::VerifyReference(const mirror::Object* val) const {
+  return !Runtime::Current()->GetHeap()->IsInTempSpace(val);
+}
+
 mirror::Object* ShadowFrame::GetThisObject() const {
   mirror::ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
diff --git a/runtime/stack.h b/runtime/stack.h
index a4b93bc..3d6b06a 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -150,10 +150,15 @@
     return *reinterpret_cast<unaligned_double*>(vreg);
   }
 
+  template <bool kChecked = false>
   mirror::Object* GetVRegReference(size_t i) const {
     DCHECK_LT(i, NumberOfVRegs());
     if (HasReferenceArray()) {
       mirror::Object* ref = References()[i];
+      if (kChecked) {
+        CHECK(VerifyReference(ref)) << "VReg " << i << "(" << ref
+                                    << ") is in protected space, reference array " << true;
+      }
       // If the vreg reference is not equal to the vreg then the vreg reference is stale.
       if (reinterpret_cast<uint32_t>(ref) != vregs_[i]) {
         return nullptr;
@@ -161,7 +166,12 @@
       return ref;
     } else {
       const uint32_t* vreg = &vregs_[i];
-      return *reinterpret_cast<mirror::Object* const*>(vreg);
+      mirror::Object* ref = *reinterpret_cast<mirror::Object* const*>(vreg);
+      if (kChecked) {
+        CHECK(VerifyReference(ref)) << "VReg " << i
+            << "(" << ref << ") is in protected space, reference array " << false;
+      }
+      return ref;
     }
   }
 
@@ -174,12 +184,22 @@
     DCHECK_LT(i, NumberOfVRegs());
     uint32_t* vreg = &vregs_[i];
     *reinterpret_cast<int32_t*>(vreg) = val;
+    // This is needed for moving collectors since these can update the vreg references if they
+    // happen to agree with references in the reference array.
+    if (kMovingCollector && HasReferenceArray()) {
+      References()[i] = nullptr;
+    }
   }
 
   void SetVRegFloat(size_t i, float val) {
     DCHECK_LT(i, NumberOfVRegs());
     uint32_t* vreg = &vregs_[i];
     *reinterpret_cast<float*>(vreg) = val;
+    // This is needed for moving collectors since these can update the vreg references if they
+    // happen to agree with references in the reference array.
+    if (kMovingCollector && HasReferenceArray()) {
+      References()[i] = nullptr;
+    }
   }
 
   void SetVRegLong(size_t i, int64_t val) {
@@ -188,6 +208,12 @@
     // Alignment attribute required for GCC 4.8
     typedef int64_t unaligned_int64 __attribute__ ((aligned (4)));
     *reinterpret_cast<unaligned_int64*>(vreg) = val;
+    // This is needed for moving collectors since these can update the vreg references if they
+    // happen to agree with references in the reference array.
+    if (kMovingCollector && HasReferenceArray()) {
+      References()[i] = nullptr;
+      References()[i + 1] = nullptr;
+    }
   }
 
   void SetVRegDouble(size_t i, double val) {
@@ -196,10 +222,18 @@
     // Alignment attribute required for GCC 4.8
     typedef double unaligned_double __attribute__ ((aligned (4)));
     *reinterpret_cast<unaligned_double*>(vreg) = val;
+    // This is needed for moving collectors since these can update the vreg references if they
+    // happen to agree with references in the reference array.
+    if (kMovingCollector && HasReferenceArray()) {
+      References()[i] = nullptr;
+      References()[i + 1] = nullptr;
+    }
   }
 
   void SetVRegReference(size_t i, mirror::Object* val) {
     DCHECK_LT(i, NumberOfVRegs());
+    DCHECK(!kMovingCollector || VerifyReference(val))
+        << "VReg " << i << "(" << val << ") is in protected space";
     uint32_t* vreg = &vregs_[i];
     *reinterpret_cast<mirror::Object**>(vreg) = val;
     if (HasReferenceArray()) {
@@ -280,6 +314,8 @@
     return reinterpret_cast<mirror::Object* const*>(vreg_end);
   }
 
+  bool VerifyReference(const mirror::Object* val) const;
+
   mirror::Object** References() {
     return const_cast<mirror::Object**>(const_cast<const ShadowFrame*>(this)->References());
   }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 9751076..1add507 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -152,7 +152,7 @@
     MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
     // Check that if we got here we cannot be shutting down (as shutdown should never have started
     // while threads are being born).
-    CHECK(!runtime->IsShuttingDown());
+    CHECK(!runtime->IsShuttingDownLocked());
     self->Init(runtime->GetThreadList(), runtime->GetJavaVM());
     Runtime::Current()->EndThreadBirth();
   }
@@ -241,7 +241,7 @@
   bool thread_start_during_shutdown = false;
   {
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    if (runtime->IsShuttingDown()) {
+    if (runtime->IsShuttingDownLocked()) {
       thread_start_during_shutdown = true;
     } else {
       runtime->StartThreadBirth();
@@ -328,7 +328,7 @@
   }
   {
     MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
-    if (runtime->IsShuttingDown()) {
+    if (runtime->IsShuttingDownLocked()) {
       LOG(ERROR) << "Thread attaching while runtime is shutting down: " << thread_name;
       return NULL;
     } else {
@@ -917,6 +917,7 @@
       throwing_OutOfMemoryError_(false),
       debug_suspend_count_(0),
       debug_invoke_req_(new DebugInvokeReq),
+      single_step_control_(new SingleStepControl),
       deoptimization_shadow_frame_(NULL),
       instrumentation_stack_(new std::deque<instrumentation::InstrumentationStackFrame>),
       name_(new std::string(kThreadNameDuringStartup)),
@@ -930,6 +931,7 @@
   state_and_flags_.as_struct.flags = 0;
   state_and_flags_.as_struct.state = kNative;
   memset(&held_mutexes_[0], 0, sizeof(held_mutexes_));
+  memset(rosalloc_runs_, 0, sizeof(rosalloc_runs_));
 }
 
 bool Thread::IsStillStarting() const {
@@ -1018,10 +1020,13 @@
   }
 
   delete debug_invoke_req_;
+  delete single_step_control_;
   delete instrumentation_stack_;
   delete name_;
   delete stack_trace_sample_;
 
+  Runtime::Current()->GetHeap()->RevokeThreadLocalBuffers(this);
+
   TearDownAlternateSignalStack();
 }
 
@@ -1352,13 +1357,12 @@
     *stack_depth = depth;
   }
 
-  MethodHelper mh;
   for (int32_t i = 0; i < depth; ++i) {
     mirror::ObjectArray<mirror::Object>* method_trace =
           soa.Decode<mirror::ObjectArray<mirror::Object>*>(internal);
     // Prepare parameters for StackTraceElement(String cls, String method, String file, int line)
     mirror::ArtMethod* method = down_cast<mirror::ArtMethod*>(method_trace->Get(i));
-    mh.ChangeMethod(method);
+    MethodHelper mh(method);
     mirror::IntArray* pc_trace = down_cast<mirror::IntArray*>(method_trace->Get(depth));
     uint32_t dex_pc = pc_trace->Get(i);
     int32_t line_number = mh.GetLineNumFromDexPC(dex_pc);
@@ -1385,11 +1389,8 @@
     SirtRef<mirror::String> source_name_object(soa.Self(),
                                                mirror::String::AllocFromModifiedUtf8(soa.Self(),
                                                                                      source_file));
-    mirror::StackTraceElement* obj = mirror::StackTraceElement::Alloc(soa.Self(),
-                                                                      class_name_object.get(),
-                                                                      method_name_object.get(),
-                                                                      source_name_object.get(),
-                                                                      line_number);
+    mirror::StackTraceElement* obj = mirror::StackTraceElement::Alloc(
+        soa.Self(), class_name_object, method_name_object, source_name_object, line_number);
     if (obj == NULL) {
       return NULL;
     }
@@ -1437,8 +1438,10 @@
   if (throw_location.GetMethod() != NULL) {
     cl = throw_location.GetMethod()->GetDeclaringClass()->GetClassLoader();
   }
+  SirtRef<mirror::ClassLoader> class_loader(this, cl);
   SirtRef<mirror::Class>
-      exception_class(this, runtime->GetClassLinker()->FindClass(exception_class_descriptor, cl));
+      exception_class(this, runtime->GetClassLinker()->FindClass(exception_class_descriptor,
+                                                                 class_loader));
   if (UNLIKELY(exception_class.get() == NULL)) {
     CHECK(IsExceptionPending());
     LOG(ERROR) << "No exception class " << PrettyDescriptor(exception_class_descriptor);
@@ -1453,6 +1456,12 @@
   SirtRef<mirror::Throwable> exception(this,
                                 down_cast<mirror::Throwable*>(exception_class->AllocObject(this)));
 
+  // If we couldn't allocate the exception, throw the pre-allocated out of memory exception.
+  if (exception.get() == nullptr) {
+    SetException(throw_location, Runtime::Current()->GetPreAllocatedOutOfMemoryError());
+    return;
+  }
+
   // Choose an appropriate constructor and set up the arguments.
   const char* signature;
   SirtRef<mirror::String> msg_string(this, NULL);
@@ -1741,18 +1750,21 @@
     return true;  // Continue stack walk.
   }
 
-  bool HandleDeoptimization(mirror::ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool HandleDeoptimization(mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     MethodHelper mh(m);
     const DexFile::CodeItem* code_item = mh.GetCodeItem();
     CHECK(code_item != NULL);
-    uint16_t num_regs =  code_item->registers_size_;
+    uint16_t num_regs = code_item->registers_size_;
     uint32_t dex_pc = GetDexPc();
     const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
     uint32_t new_dex_pc = dex_pc + inst->SizeInCodeUnits();
     ShadowFrame* new_frame = ShadowFrame::Create(num_regs, NULL, m, new_dex_pc);
-    verifier::MethodVerifier verifier(&mh.GetDexFile(), mh.GetDexCache(), mh.GetClassLoader(),
-                                      &mh.GetClassDef(), code_item,
-                                      m->GetDexMethodIndex(), m, m->GetAccessFlags(), false, true);
+    SirtRef<mirror::DexCache> dex_cache(self_, mh.GetDexCache());
+    SirtRef<mirror::ClassLoader> class_loader(self_, mh.GetClassLoader());
+    verifier::MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader,
+                                      &mh.GetClassDef(), code_item, m->GetDexMethodIndex(), m,
+                                      m->GetAccessFlags(), false, true);
     verifier.Verify();
     std::vector<int32_t> kinds = verifier.DescribeVRegs(dex_pc);
     for (uint16_t reg = 0; reg < num_regs; reg++) {
@@ -2088,6 +2100,13 @@
   void* const arg_;
 };
 
+void Thread::SetClassLoaderOverride(mirror::ClassLoader* class_loader_override) {
+  if (kIsDebugBuild) {
+    Runtime::Current()->GetHeap()->VerifyObject(class_loader_override);
+  }
+  class_loader_override_ = class_loader_override;
+}
+
 void Thread::VisitRoots(RootVisitor* visitor, void* arg) {
   if (opeer_ != nullptr) {
     opeer_ = visitor(opeer_, arg);
@@ -2115,10 +2134,9 @@
   for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
     if (frame.this_object_ != nullptr) {
       frame.this_object_ = visitor(frame.this_object_, arg);
-      DCHECK(frame.this_object_ != nullptr);
     }
-    frame.method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
     DCHECK(frame.method_ != nullptr);
+    frame.method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
   }
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 3aa1373..db2f7b4 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -68,6 +68,7 @@
 class ScopedObjectAccess;
 class ScopedObjectAccessUnchecked;
 class ShadowFrame;
+struct SingleStepControl;
 class Thread;
 class ThreadList;
 
@@ -177,34 +178,27 @@
       ALWAYS_INLINE;
 
   // Once called thread suspension will cause an assertion failure.
-#ifndef NDEBUG
   const char* StartAssertNoThreadSuspension(const char* cause) {
-    CHECK(cause != NULL);
-    const char* previous_cause = last_no_thread_suspension_cause_;
-    no_thread_suspension_++;
-    last_no_thread_suspension_cause_ = cause;
-    return previous_cause;
+    if (kIsDebugBuild) {
+      CHECK(cause != NULL);
+      const char* previous_cause = last_no_thread_suspension_cause_;
+      no_thread_suspension_++;
+      last_no_thread_suspension_cause_ = cause;
+      return previous_cause;
+    } else {
+      return nullptr;
+    }
   }
-#else
-  const char* StartAssertNoThreadSuspension(const char* cause) {
-    CHECK(cause != NULL);
-    return NULL;
-  }
-#endif
 
   // End region where no thread suspension is expected.
-#ifndef NDEBUG
   void EndAssertNoThreadSuspension(const char* old_cause) {
-    CHECK(old_cause != NULL || no_thread_suspension_ == 1);
-    CHECK_GT(no_thread_suspension_, 0U);
-    no_thread_suspension_--;
-    last_no_thread_suspension_cause_ = old_cause;
+    if (kIsDebugBuild) {
+      CHECK(old_cause != NULL || no_thread_suspension_ == 1);
+      CHECK_GT(no_thread_suspension_, 0U);
+      no_thread_suspension_--;
+      last_no_thread_suspension_cause_ = old_cause;
+    }
   }
-#else
-  void EndAssertNoThreadSuspension(const char*) {
-  }
-#endif
-
 
   void AssertThreadSuspensionIsAllowable(bool check_locks = true) const;
 
@@ -370,9 +364,7 @@
     return class_loader_override_;
   }
 
-  void SetClassLoaderOverride(mirror::ClassLoader* class_loader_override) {
-    class_loader_override_ = class_loader_override;
-  }
+  void SetClassLoaderOverride(mirror::ClassLoader* class_loader_override);
 
   // Create the internal representation of a stack trace, that is more time
   // and space efficient to compute than the StackTraceElement[]
@@ -522,6 +514,10 @@
     return debug_invoke_req_;
   }
 
+  SingleStepControl* GetSingleStepControl() const {
+    return single_step_control_;
+  }
+
   void SetDeoptimizationShadowFrame(ShadowFrame* sf);
   void SetDeoptimizationReturnValue(const JValue& ret_val);
 
@@ -755,6 +751,9 @@
   // JDWP invoke-during-breakpoint support.
   DebugInvokeReq* debug_invoke_req_;
 
+  // JDWP single-stepping support.
+  SingleStepControl* single_step_control_;
+
   // Shadow frame that is used temporarily during the deoptimization of a method.
   ShadowFrame* deoptimization_shadow_frame_;
   JValue deoptimization_return_value_;
@@ -799,6 +798,15 @@
 
   friend class ScopedThreadStateChange;
 
+ public:
+  // Thread-local rosalloc runs. There are 34 size brackets in rosalloc
+  // runs (RosAlloc::kNumOfSizeBrackets). We can't refer to the
+  // RosAlloc class due to a header file circular dependency issue.
+  // To compensate, we check that the two values match at RosAlloc
+  // initialization time.
+  static const size_t kRosAllocNumOfSizeBrackets = 34;
+  void* rosalloc_runs_[kRosAllocNumOfSizeBrackets];
+
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index ff1ed2a..dd3f11c 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -74,6 +74,15 @@
   return Locks::thread_list_lock_->GetExclusiveOwnerTid();
 }
 
+void ThreadList::DumpNativeStacks(std::ostream& os) {
+  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+  for (const auto& thread : list_) {
+    os << "DUMPING THREAD " << thread->tid_ << "\n";
+    DumpNativeStack(os, thread->tid_, "\t", true);
+    os << "\n";
+  }
+}
+
 void ThreadList::DumpForSigQuit(std::ostream& os) {
   {
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
@@ -413,7 +422,7 @@
           return thread;
         }
         if (total_delay_us >= kTimeoutUs) {
-          ThreadSuspendByPeerWarning(self, ERROR, "Thread suspension timed out", peer);
+          ThreadSuspendByPeerWarning(self, FATAL, "Thread suspension timed out", peer);
           if (did_suspend_request) {
             thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
           }
@@ -477,7 +486,7 @@
           return thread;
         }
         if (total_delay_us >= kTimeoutUs) {
-          ThreadSuspendByThreadIdWarning(ERROR, "Thread suspension timed out", thread_id);
+          ThreadSuspendByThreadIdWarning(WARNING, "Thread suspension timed out", thread_id);
           if (did_suspend_request) {
             thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
           }
@@ -626,7 +635,7 @@
     {
       // No more threads can be born after we start to shutdown.
       MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-      CHECK(Runtime::Current()->IsShuttingDown());
+      CHECK(Runtime::Current()->IsShuttingDownLocked());
       CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U);
     }
     all_threads_are_daemons = true;
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index b1b3e88..45994ae 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -124,6 +124,9 @@
     return list_;
   }
 
+  void DumpNativeStacks(std::ostream& os)
+      LOCKS_EXCLUDED(Locks::thread_list_lock_);
+
  private:
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_);
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index bb6c475..aca0561 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -28,12 +28,15 @@
 ThreadPoolWorker::ThreadPoolWorker(ThreadPool* thread_pool, const std::string& name,
                                    size_t stack_size)
     : thread_pool_(thread_pool),
-      name_(name),
-      stack_size_(stack_size) {
+      name_(name) {
+  std::string error_msg;
+  stack_.reset(MemMap::MapAnonymous(name.c_str(), nullptr, stack_size, PROT_READ | PROT_WRITE,
+                                    &error_msg));
+  CHECK(stack_.get() != nullptr) << error_msg;
   const char* reason = "new thread pool worker thread";
   pthread_attr_t attr;
   CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), reason);
-  CHECK_PTHREAD_CALL(pthread_attr_setstacksize, (&attr, stack_size), reason);
+  CHECK_PTHREAD_CALL(pthread_attr_setstack, (&attr, stack_->Begin(), stack_->Size()), reason);
   CHECK_PTHREAD_CALL(pthread_create, (&pthread_, &attr, &Callback, this), reason);
   CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attr), reason);
 }
@@ -71,8 +74,9 @@
   }
 }
 
-ThreadPool::ThreadPool(size_t num_threads)
-  : task_queue_lock_("task queue lock"),
+ThreadPool::ThreadPool(const char* name, size_t num_threads)
+  : name_(name),
+    task_queue_lock_("task queue lock"),
     task_queue_condition_("task queue condition", task_queue_lock_),
     completion_condition_("task completion condition", task_queue_lock_),
     started_(false),
@@ -85,7 +89,7 @@
     max_active_workers_(num_threads) {
   Thread* self = Thread::Current();
   while (GetThreadCount() < num_threads) {
-    const std::string name = StringPrintf("Thread pool worker %zu", GetThreadCount());
+    const std::string name = StringPrintf("%s worker thread %zu", name_.c_str(), GetThreadCount());
     threads_.push_back(new ThreadPoolWorker(this, name, ThreadPoolWorker::kDefaultStackSize));
   }
   // Wait for all of the threads to attach.
@@ -270,8 +274,8 @@
 
 WorkStealingWorker::~WorkStealingWorker() {}
 
-WorkStealingThreadPool::WorkStealingThreadPool(size_t num_threads)
-    : ThreadPool(0),
+WorkStealingThreadPool::WorkStealingThreadPool(const char* name, size_t num_threads)
+    : ThreadPool(name, 0),
       work_steal_lock_("work stealing lock"),
       steal_index_(0) {
   while (GetThreadCount() < num_threads) {
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index b9a97a1..e8f9afe 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -24,6 +24,7 @@
 #include "base/mutex.h"
 #include "closure.h"
 #include "locks.h"
+#include "mem_map.h"
 
 namespace art {
 
@@ -40,7 +41,8 @@
   static const size_t kDefaultStackSize = 1 * MB;
 
   size_t GetStackSize() const {
-    return stack_size_;
+    DCHECK(stack_.get() != nullptr);
+    return stack_->Size();
   }
 
   virtual ~ThreadPoolWorker();
@@ -52,7 +54,7 @@
 
   ThreadPool* const thread_pool_;
   const std::string name_;
-  const size_t stack_size_;
+  UniquePtr<MemMap> stack_;
   pthread_t pthread_;
 
  private:
@@ -77,7 +79,7 @@
   // after running it, it is the caller's responsibility.
   void AddTask(Thread* self, Task* task);
 
-  explicit ThreadPool(size_t num_threads);
+  explicit ThreadPool(const char* name, size_t num_threads);
   virtual ~ThreadPool();
 
   // Wait for all tasks currently on queue to get completed.
@@ -107,6 +109,7 @@
     return shutting_down_;
   }
 
+  const std::string name_;
   Mutex task_queue_lock_;
   ConditionVariable task_queue_condition_ GUARDED_BY(task_queue_lock_);
   ConditionVariable completion_condition_ GUARDED_BY(task_queue_lock_);
@@ -167,7 +170,7 @@
 
 class WorkStealingThreadPool : public ThreadPool {
  public:
-  explicit WorkStealingThreadPool(size_t num_threads);
+  explicit WorkStealingThreadPool(const char* name, size_t num_threads);
   virtual ~WorkStealingThreadPool();
 
  private:
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index 9b789d2..1b22361 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -59,7 +59,7 @@
 // Check that the thread pool actually runs tasks that you assign it.
 TEST_F(ThreadPoolTest, CheckRun) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Thread pool test thread pool", num_threads);
   AtomicInteger count(0);
   static const int32_t num_tasks = num_threads * 4;
   for (int32_t i = 0; i < num_tasks; ++i) {
@@ -74,7 +74,7 @@
 
 TEST_F(ThreadPoolTest, StopStart) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Thread pool test thread pool", num_threads);
   AtomicInteger count(0);
   static const int32_t num_tasks = num_threads * 4;
   for (int32_t i = 0; i < num_tasks; ++i) {
@@ -129,7 +129,7 @@
 // Test that adding new tasks from within a task works.
 TEST_F(ThreadPoolTest, RecursiveTest) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Thread pool test thread pool", num_threads);
   AtomicInteger count(0);
   static const int depth = 8;
   thread_pool.AddTask(self, new TreeTask(&thread_pool, &count, depth));
diff --git a/runtime/trace.cc b/runtime/trace.cc
index ec95a87..da2c80a 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -570,7 +570,8 @@
                       thread_clock_diff, wall_clock_diff);
 }
 
-void Trace::MethodUnwind(Thread* thread, const mirror::ArtMethod* method, uint32_t dex_pc) {
+void Trace::MethodUnwind(Thread* thread, mirror::Object* this_object,
+                         const mirror::ArtMethod* method, uint32_t dex_pc) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
diff --git a/runtime/trace.h b/runtime/trace.h
index ffcb36d..9be015a 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -79,7 +79,8 @@
                             const mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MethodUnwind(Thread* thread, const mirror::ArtMethod* method, uint32_t dex_pc)
+  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                            const mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
                           const mirror::ArtMethod* method, uint32_t new_dex_pc)
diff --git a/runtime/utils.h b/runtime/utils.h
index 6850e8b..4b39acd 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -122,7 +122,7 @@
 // For rounding integers.
 template<typename T>
 static inline T RoundDown(T x, int n) {
-  CHECK(IsPowerOfTwo(n));
+  DCHECK(IsPowerOfTwo(n));
   return (x & -n);
 }
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 9f98061..1e45c60 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -39,6 +39,7 @@
 #include "object_utils.h"
 #include "register_line-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change.h"
 #include "verifier/dex_gc_map.h"
 
 namespace art {
@@ -113,17 +114,15 @@
     *error += dex_file.GetLocation();
     return kHardFailure;
   }
-  return VerifyClass(&dex_file,
-                     kh.GetDexCache(),
-                     klass->GetClassLoader(),
-                     class_def,
-                     allow_soft_failures,
-                     error);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, kh.GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, klass->GetClassLoader());
+  return VerifyClass(&dex_file, dex_cache, class_loader, class_def, allow_soft_failures, error);
 }
 
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(const DexFile* dex_file,
-                                                        mirror::DexCache* dex_cache,
-                                                        mirror::ClassLoader* class_loader,
+                                                        SirtRef<mirror::DexCache>& dex_cache,
+                                                        SirtRef<mirror::ClassLoader>& class_loader,
                                                         const DexFile::ClassDef* class_def,
                                                         bool allow_soft_failures,
                                                         std::string* error) {
@@ -233,8 +232,8 @@
 
 MethodVerifier::FailureKind MethodVerifier::VerifyMethod(uint32_t method_idx,
                                                          const DexFile* dex_file,
-                                                         mirror::DexCache* dex_cache,
-                                                         mirror::ClassLoader* class_loader,
+                                                         SirtRef<mirror::DexCache>& dex_cache,
+                                                         SirtRef<mirror::ClassLoader>& class_loader,
                                                          const DexFile::ClassDef* class_def,
                                                          const DexFile::CodeItem* code_item,
                                                          mirror::ArtMethod* method,
@@ -243,8 +242,8 @@
   MethodVerifier::FailureKind result = kNoFailure;
   uint64_t start_ns = NanoTime();
 
-  MethodVerifier verifier_(dex_file, dex_cache, class_loader, class_def, code_item, method_idx,
-                           method, method_access_flags, true, allow_soft_failures);
+  MethodVerifier verifier_(dex_file, &dex_cache, &class_loader, class_def, code_item,
+                           method_idx, method, method_access_flags, true, allow_soft_failures);
   if (verifier_.Verify()) {
     // Verification completed, however failures may be pending that didn't cause the verification
     // to hard fail.
@@ -277,13 +276,14 @@
 }
 
 void MethodVerifier::VerifyMethodAndDump(std::ostream& os, uint32_t dex_method_idx,
-                                         const DexFile* dex_file, mirror::DexCache* dex_cache,
-                                         mirror::ClassLoader* class_loader,
+                                         const DexFile* dex_file,
+                                         SirtRef<mirror::DexCache>& dex_cache,
+                                         SirtRef<mirror::ClassLoader>& class_loader,
                                          const DexFile::ClassDef* class_def,
                                          const DexFile::CodeItem* code_item,
                                          mirror::ArtMethod* method,
                                          uint32_t method_access_flags) {
-  MethodVerifier verifier(dex_file, dex_cache, class_loader, class_def, code_item,
+  MethodVerifier verifier(dex_file, &dex_cache, &class_loader, class_def, code_item,
                           dex_method_idx, method, method_access_flags, true, true);
   verifier.Verify();
   verifier.DumpFailures(os);
@@ -291,13 +291,12 @@
   verifier.Dump(os);
 }
 
-MethodVerifier::MethodVerifier(const DexFile* dex_file, mirror::DexCache* dex_cache,
-                               mirror::ClassLoader* class_loader,
+MethodVerifier::MethodVerifier(const DexFile* dex_file, SirtRef<mirror::DexCache>* dex_cache,
+                               SirtRef<mirror::ClassLoader>* class_loader,
                                const DexFile::ClassDef* class_def,
-                               const DexFile::CodeItem* code_item,
-                               uint32_t dex_method_idx, mirror::ArtMethod* method,
-                               uint32_t method_access_flags, bool can_load_classes,
-                               bool allow_soft_failures)
+                               const DexFile::CodeItem* code_item, uint32_t dex_method_idx,
+                               mirror::ArtMethod* method, uint32_t method_access_flags,
+                               bool can_load_classes, bool allow_soft_failures)
     : reg_types_(can_load_classes),
       work_insn_idx_(-1),
       dex_method_idx_(dex_method_idx),
@@ -323,12 +322,19 @@
   DCHECK(class_def != nullptr);
 }
 
+MethodVerifier::~MethodVerifier() {
+  STLDeleteElements(&failure_messages_);
+}
+
 void MethodVerifier::FindLocksAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc,
                                       std::vector<uint32_t>& monitor_enter_dex_pcs) {
   MethodHelper mh(m);
-  MethodVerifier verifier(&mh.GetDexFile(), mh.GetDexCache(), mh.GetClassLoader(),
-                          &mh.GetClassDef(), mh.GetCodeItem(), m->GetDexMethodIndex(),
-                          m, m->GetAccessFlags(), false, true);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
+                          mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), false,
+                          true);
   verifier.interesting_dex_pc_ = dex_pc;
   verifier.monitor_enter_dex_pcs_ = &monitor_enter_dex_pcs;
   verifier.FindLocksAtDexPc();
@@ -348,9 +354,12 @@
 mirror::ArtField* MethodVerifier::FindAccessedFieldAtDexPc(mirror::ArtMethod* m,
                                                         uint32_t dex_pc) {
   MethodHelper mh(m);
-  MethodVerifier verifier(&mh.GetDexFile(), mh.GetDexCache(), mh.GetClassLoader(),
-                          &mh.GetClassDef(), mh.GetCodeItem(), m->GetDexMethodIndex(),
-                          m, m->GetAccessFlags(), false, true);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
+                          mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), false,
+                          true);
   return verifier.FindAccessedFieldAtDexPc(dex_pc);
 }
 
@@ -374,11 +383,14 @@
 }
 
 mirror::ArtMethod* MethodVerifier::FindInvokedMethodAtDexPc(mirror::ArtMethod* m,
-                                                                 uint32_t dex_pc) {
+                                                            uint32_t dex_pc) {
   MethodHelper mh(m);
-  MethodVerifier verifier(&mh.GetDexFile(), mh.GetDexCache(), mh.GetClassLoader(),
-                          &mh.GetClassDef(), mh.GetCodeItem(), m->GetDexMethodIndex(),
-                          m, m->GetAccessFlags(), false, true);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
+                          mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), false,
+                          true);
   return verifier.FindInvokedMethodAtDexPc(dex_pc);
 }
 
@@ -589,7 +601,7 @@
       if (iterator.GetHandlerTypeIndex() != DexFile::kDexNoIndex16) {
         mirror::Class* exception_type = linker->ResolveType(*dex_file_,
                                                             iterator.GetHandlerTypeIndex(),
-                                                            dex_cache_, class_loader_);
+                                                            *dex_cache_, *class_loader_);
         if (exception_type == NULL) {
           DCHECK(Thread::Current()->IsExceptionPending());
           Thread::Current()->ClearException();
@@ -1017,26 +1029,6 @@
   return true;
 }
 
-static const std::vector<uint8_t>* CreateLengthPrefixedDexGcMap(
-    const std::vector<uint8_t>& gc_map) {
-  std::vector<uint8_t>* length_prefixed_gc_map = new std::vector<uint8_t>;
-  length_prefixed_gc_map->reserve(gc_map.size() + 4);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0xff000000) >> 24);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0x00ff0000) >> 16);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0x0000ff00) >> 8);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0x000000ff) >> 0);
-  length_prefixed_gc_map->insert(length_prefixed_gc_map->end(),
-                                 gc_map.begin(),
-                                 gc_map.end());
-  DCHECK_EQ(gc_map.size() + 4, length_prefixed_gc_map->size());
-  DCHECK_EQ(gc_map.size(),
-            static_cast<size_t>((length_prefixed_gc_map->at(0) << 24) |
-                                (length_prefixed_gc_map->at(1) << 16) |
-                                (length_prefixed_gc_map->at(2) << 8) |
-                                (length_prefixed_gc_map->at(3) << 0)));
-  return length_prefixed_gc_map;
-}
-
 bool MethodVerifier::VerifyCodeFlow() {
   uint16_t registers_size = code_item_->registers_size_;
   uint32_t insns_size = code_item_->insns_size_in_code_units_;
@@ -1076,16 +1068,15 @@
     bool compile = IsCandidateForCompilation(ref, method_access_flags_);
     if (compile) {
       /* Generate a register map and add it to the method. */
-      UniquePtr<const std::vector<uint8_t> > map(GenerateGcMap());
-      if (map.get() == NULL) {
+      const std::vector<uint8_t>* dex_gc_map = GenerateLengthPrefixedGcMap();
+      if (dex_gc_map == NULL) {
         DCHECK_NE(failures_.size(), 0U);
         return false;  // Not a real failure, but a failure to encode
       }
       if (kIsDebugBuild) {
-        VerifyGcMap(*map);
+        VerifyLengthPrefixedGcMap(*dex_gc_map);
       }
-      const std::vector<uint8_t>* dex_gc_map = CreateLengthPrefixedDexGcMap(*(map.get()));
-      verifier::MethodVerifier::SetDexGcMap(ref, *dex_gc_map);
+      verifier::MethodVerifier::SetDexGcMap(ref, dex_gc_map);
     }
 
     if (has_check_casts_) {
@@ -1107,10 +1098,8 @@
 
 std::ostream& MethodVerifier::DumpFailures(std::ostream& os) {
   DCHECK_EQ(failures_.size(), failure_messages_.size());
-  if (VLOG_IS_ON(verifier)) {
-      for (size_t i = 0; i < failures_.size(); ++i) {
-          os << failure_messages_[i]->str() << "\n";
-      }
+  for (size_t i = 0; i < failures_.size(); ++i) {
+      os << failure_messages_[i]->str() << "\n";
   }
   return os;
 }
@@ -1211,7 +1200,8 @@
         // it's effectively considered initialized the instant we reach here (in the sense that we
         // can return without doing anything or call virtual methods).
         {
-          const RegType& reg_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+          const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+                                                              false);
           reg_line->SetRegisterType(arg_start + cur_arg, reg_type);
         }
         break;
@@ -1853,7 +1843,8 @@
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data with array type "
                                             << array_type;
         } else {
-          const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_);
+          const RegType& component_type = reg_types_.GetComponentType(array_type,
+                                                                      class_loader_->get());
           DCHECK(!component_type.IsConflict());
           if (component_type.IsNonZeroReferenceTypes()) {
             Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data with component type "
@@ -2168,7 +2159,7 @@
         const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
         uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
         const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
-        return_type = &reg_types_.FromDescriptor(class_loader_, descriptor, false);
+        return_type = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
       }
       if (!return_type->IsLowHalf()) {
         work_line_->SetResultRegisterType(*return_type);
@@ -2235,8 +2226,8 @@
          */
         work_line_->MarkRefsAsInitialized(this_type);
       }
-      const RegType& return_type = reg_types_.FromDescriptor(class_loader_, return_type_descriptor,
-                                                             false);
+      const RegType& return_type = reg_types_.FromDescriptor(class_loader_->get(),
+                                                             return_type_descriptor, false);
       if (!return_type.IsLowHalf()) {
         work_line_->SetResultRegisterType(return_type);
       } else {
@@ -2257,11 +2248,12 @@
           uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
           const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
           uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
-          descriptor =  dex_file_->StringByTypeIdx(return_type_idx);
+          descriptor = dex_file_->StringByTypeIdx(return_type_idx);
         } else {
           descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
         }
-        const RegType& return_type =  reg_types_.FromDescriptor(class_loader_, descriptor, false);
+        const RegType& return_type =  reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+                                                                false);
         if (!return_type.IsLowHalf()) {
           work_line_->SetResultRegisterType(return_type);
         } else {
@@ -2318,7 +2310,8 @@
       } else {
         descriptor = MethodHelper(abs_method).GetReturnTypeDescriptor();
       }
-      const RegType& return_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+      const RegType& return_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+                                                             false);
       if (!return_type.IsLowHalf()) {
         work_line_->SetResultRegisterType(return_type);
       } else {
@@ -2584,7 +2577,8 @@
       mirror::ArtMethod* called_method = VerifyInvokeVirtualQuickArgs(inst, is_range);
       if (called_method != NULL) {
         const char* descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
-        const RegType& return_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+        const RegType& return_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+                                                               false);
         if (!return_type.IsLowHalf()) {
           work_line_->SetResultRegisterType(return_type);
         } else {
@@ -2850,18 +2844,18 @@
 const RegType& MethodVerifier::ResolveClassAndCheckAccess(uint32_t class_idx) {
   const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
   const RegType& referrer = GetDeclaringClass();
-  mirror::Class* klass = dex_cache_->GetResolvedType(class_idx);
+  mirror::Class* klass = (*dex_cache_)->GetResolvedType(class_idx);
   const RegType& result =
       klass != NULL ? reg_types_.FromClass(descriptor, klass,
                                            klass->CannotBeAssignedFromOtherTypes())
-                    : reg_types_.FromDescriptor(class_loader_, descriptor, false);
+                    : reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
   if (result.IsConflict()) {
     Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "accessing broken descriptor '" << descriptor
         << "' in " << referrer;
     return result;
   }
   if (klass == NULL && !result.IsUnresolvedTypes()) {
-    dex_cache_->SetResolvedType(class_idx, result.GetClass());
+    (*dex_cache_)->SetResolvedType(class_idx, result.GetClass());
   }
   // Check if access is allowed. Unresolved types use xxxWithAccessCheck to
   // check at runtime if access is allowed and so pass here. If result is
@@ -2935,7 +2929,7 @@
   }
   mirror::Class* klass = klass_type.GetClass();
   const RegType& referrer = GetDeclaringClass();
-  mirror::ArtMethod* res_method = dex_cache_->GetResolvedMethod(dex_method_idx);
+  mirror::ArtMethod* res_method = (*dex_cache_)->GetResolvedMethod(dex_method_idx);
   if (res_method == NULL) {
     const char* name = dex_file_->GetMethodName(method_id);
     const Signature signature = dex_file_->GetMethodSignature(method_id);
@@ -2948,7 +2942,7 @@
       res_method = klass->FindVirtualMethod(name, signature);
     }
     if (res_method != NULL) {
-      dex_cache_->SetResolvedMethod(dex_method_idx, res_method);
+      (*dex_cache_)->SetResolvedMethod(dex_method_idx, res_method);
     } else {
       // If a virtual or interface method wasn't found with the expected type, look in
       // the direct methods. This can happen when the wrong invoke type is used or when
@@ -3112,7 +3106,7 @@
           << " missing signature component";
       return NULL;
     }
-    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
     uint32_t get_reg = is_range ? inst->VRegC_3rc() + actual_args : arg[actual_args];
     if (reg_type.IsIntegralTypes()) {
       const RegType& src_type = work_line_->GetRegisterType(get_reg);
@@ -3136,8 +3130,7 @@
 }
 
 mirror::ArtMethod* MethodVerifier::GetQuickInvokedMethod(const Instruction* inst,
-                                                              RegisterLine* reg_line,
-                                                              bool is_range) {
+                                                         RegisterLine* reg_line, bool is_range) {
   DCHECK(inst->Opcode() == Instruction::INVOKE_VIRTUAL_QUICK ||
          inst->Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK);
   const RegType& actual_arg_type = reg_line->GetInvocationThis(inst, is_range);
@@ -3152,11 +3145,13 @@
   } else {
     const std::string& descriptor(actual_arg_type.GetDescriptor());
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    this_class = class_linker->FindClass(descriptor.c_str(), class_loader_);
+    this_class = class_linker->FindClass(descriptor.c_str(), *class_loader_);
     if (this_class == NULL) {
-      Thread::Current()->ClearException();
+      Thread* self = Thread::Current();
+      self->ClearException();
       // Look for a system class
-      this_class = class_linker->FindClass(descriptor.c_str(), NULL);
+      SirtRef<mirror::ClassLoader> null_class_loader(self, nullptr);
+      this_class = class_linker->FindClass(descriptor.c_str(), null_class_loader);
     }
   }
   if (this_class == NULL) {
@@ -3246,7 +3241,7 @@
                                         << " missing signature component";
       return NULL;
     }
-    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
     uint32_t get_reg = is_range ? inst->VRegC_3rc() + actual_args : arg[actual_args];
     if (!work_line_->VerifyRegisterType(get_reg, reg_type)) {
       return res_method;
@@ -3290,7 +3285,7 @@
     } else {
       // Verify each register. If "arg_count" is bad, VerifyRegisterType() will run off the end of
       // the list and fail. It's legal, if silly, for arg_count to be zero.
-      const RegType& expected_type = reg_types_.GetComponentType(res_type, class_loader_);
+      const RegType& expected_type = reg_types_.GetComponentType(res_type, class_loader_->get());
       uint32_t arg_count = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
       uint32_t arg[5];
       if (!is_range) {
@@ -3332,7 +3327,7 @@
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aget";
     } else {
       /* verify the class */
-      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_);
+      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_->get());
       if (!component_type.IsReferenceTypes() && !is_primitive) {
         Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "primitive array type " << array_type
             << " source for aget-object";
@@ -3409,7 +3404,7 @@
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aput";
     } else {
-      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_);
+      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_->get());
       const uint32_t vregA = inst->VRegA_23x();
       if (is_primitive) {
         VerifyPrimitivePut(component_type, insn_type, vregA);
@@ -3441,10 +3436,9 @@
   if (klass_type.IsUnresolvedTypes()) {
     return NULL;  // Can't resolve Class so no more to do here, will do checking at runtime.
   }
-  mirror::ArtField* field = Runtime::Current()->GetClassLinker()->ResolveFieldJLS(*dex_file_,
-                                                                               field_idx,
-                                                                               dex_cache_,
-                                                                               class_loader_);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  mirror::ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, *dex_cache_,
+                                                          *class_loader_);
   if (field == NULL) {
     VLOG(verifier) << "Unable to resolve static field " << field_idx << " ("
               << dex_file_->GetFieldName(field_id) << ") in "
@@ -3460,9 +3454,8 @@
   } else if (!field->IsStatic()) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << PrettyField(field) << " to be static";
     return NULL;
-  } else {
-    return field;
   }
+  return field;
 }
 
 mirror::ArtField* MethodVerifier::GetInstanceField(const RegType& obj_type, int field_idx) {
@@ -3478,10 +3471,9 @@
   if (klass_type.IsUnresolvedTypes()) {
     return NULL;  // Can't resolve Class so no more to do here
   }
-  mirror::ArtField* field = Runtime::Current()->GetClassLinker()->ResolveFieldJLS(*dex_file_,
-                                                                               field_idx,
-                                                                               dex_cache_,
-                                                                               class_loader_);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  mirror::ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, *dex_cache_,
+                                                          *class_loader_);
   if (field == NULL) {
     VLOG(verifier) << "Unable to resolve instance field " << field_idx << " ("
               << dex_file_->GetFieldName(field_id) << ") in "
@@ -3550,8 +3542,7 @@
   if (field_type == nullptr) {
     const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
     const char* descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
-    mirror::ClassLoader* loader = class_loader_;
-    field_type = &reg_types_.FromDescriptor(loader, descriptor, false);
+    field_type = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
   }
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
   if (is_primitive) {
@@ -3613,8 +3604,7 @@
   if (field_type == nullptr) {
     const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
     const char* descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
-    mirror::ClassLoader* loader = class_loader_;
-    field_type = &reg_types_.FromDescriptor(loader, descriptor, false);
+    field_type = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
   }
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
   if (is_primitive) {
@@ -3671,11 +3661,13 @@
     // We need to resolve the class from its descriptor.
     const std::string& descriptor(object_type.GetDescriptor());
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    object_class = class_linker->FindClass(descriptor.c_str(), class_loader_);
+    Thread* self = Thread::Current();
+    object_class = class_linker->FindClass(descriptor.c_str(), *class_loader_);
     if (object_class == NULL) {
-      Thread::Current()->ClearException();
+      self->ClearException();
       // Look for a system class
-      object_class = class_linker->FindClass(descriptor.c_str(), NULL);
+      SirtRef<mirror::ClassLoader> null_class_loader(self, nullptr);
+      object_class = class_linker->FindClass(descriptor.c_str(), null_class_loader);
     }
   }
   if (object_class == NULL) {
@@ -3881,8 +3873,8 @@
       MethodHelper mh(mirror_method_);
       mirror::Class* return_type_class = mh.GetReturnType();
       if (return_type_class != nullptr) {
-        return_type_ =&reg_types_.FromClass(mh.GetReturnTypeDescriptor(), return_type_class,
-                                            return_type_class->CannotBeAssignedFromOtherTypes());
+        return_type_ = &reg_types_.FromClass(mh.GetReturnTypeDescriptor(), return_type_class,
+                                             return_type_class->CannotBeAssignedFromOtherTypes());
       } else {
         Thread* self = Thread::Current();
         DCHECK(self->IsExceptionPending());
@@ -3894,7 +3886,7 @@
       const DexFile::ProtoId& proto_id = dex_file_->GetMethodPrototype(method_id);
       uint16_t return_type_idx = proto_id.return_type_idx_;
       const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(return_type_idx));
-      return_type_ = &reg_types_.FromDescriptor(class_loader_, descriptor, false);
+      return_type_ = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
     }
   }
   return *return_type_;
@@ -3910,7 +3902,7 @@
       declaring_class_ = &reg_types_.FromClass(descriptor, klass,
                                                klass->CannotBeAssignedFromOtherTypes());
     } else {
-      declaring_class_ = &reg_types_.FromDescriptor(class_loader_, descriptor, false);
+      declaring_class_ = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
     }
   }
   return *declaring_class_;
@@ -3969,7 +3961,8 @@
         // String[] in which case the stores need to be of Strings.
         if (array_type.IsPreciseReference()) {
           const RegType& value_type(line->GetRegisterType(inst->VRegA_23x()));
-          const RegType& component_type(reg_types_.GetComponentType(array_type, class_loader_));
+          const RegType& component_type(reg_types_.GetComponentType(array_type,
+                                                                    class_loader_->get()));
           is_safe_cast = component_type.IsStrictlyAssignableFrom(value_type);
         }
       }
@@ -4026,8 +4019,8 @@
       // We can't devirtualize abstract classes except on arrays of abstract classes.
       continue;
     }
-    mirror::ArtMethod* abstract_method =
-        dex_cache_->GetResolvedMethod(is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
+    mirror::ArtMethod* abstract_method = (*dex_cache_)->GetResolvedMethod(
+        is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
     if (abstract_method == NULL) {
       // If the method is not found in the cache this means that it was never found
       // by ResolveMethodAndCheckAccess() called when verifying invoke_*.
@@ -4061,7 +4054,7 @@
   return pc_to_concrete_method_map.release();
 }
 
-const std::vector<uint8_t>* MethodVerifier::GenerateGcMap() {
+const std::vector<uint8_t>* MethodVerifier::GenerateLengthPrefixedGcMap() {
   size_t num_entries, ref_bitmap_bits, pc_bits;
   ComputeGcMapSizes(&num_entries, &ref_bitmap_bits, &pc_bits);
   // There's a single byte to encode the size of each bitmap
@@ -4099,7 +4092,12 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Failed to encode GC map (size=" << table_size << ")";
     return NULL;
   }
-  table->reserve(table_size);
+  table->reserve(table_size + 4);  // table_size plus the length prefix
+  // Write table size
+  table->push_back((table_size & 0xff000000) >> 24);
+  table->push_back((table_size & 0x00ff0000) >> 16);
+  table->push_back((table_size & 0x0000ff00) >> 8);
+  table->push_back((table_size & 0x000000ff) >> 0);
   // Write table header
   table->push_back(format | ((ref_bitmap_bytes >> DexPcToReferenceMap::kRegMapFormatShift) &
                              ~DexPcToReferenceMap::kRegMapFormatMask));
@@ -4117,14 +4115,18 @@
       line->WriteReferenceBitMap(*table, ref_bitmap_bytes);
     }
   }
-  DCHECK_EQ(table->size(), table_size);
+  DCHECK_EQ(table->size(), table_size + 4);  // table_size plus the length prefix
   return table;
 }
 
-void MethodVerifier::VerifyGcMap(const std::vector<uint8_t>& data) {
+void MethodVerifier::VerifyLengthPrefixedGcMap(const std::vector<uint8_t>& data) {
   // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
   // that the table data is well formed and all references are marked (or not) in the bitmap
-  DexPcToReferenceMap map(&data[0], data.size());
+  DCHECK_GE(data.size(), 4u);
+  size_t table_size = data.size() - 4u;
+  DCHECK_EQ(table_size, static_cast<size_t>((data[0] << 24) | (data[1] << 16) |
+                                            (data[2] << 8) | (data[3] << 0)));
+  DexPcToReferenceMap map(&data[4], table_size);
   size_t map_index = 0;
   for (size_t i = 0; i < code_item_->insns_size_in_code_units_; i++) {
     const uint8_t* reg_bitmap = map.FindBitMap(i, false);
@@ -4150,7 +4152,7 @@
   }
 }
 
-void MethodVerifier::SetDexGcMap(MethodReference ref, const std::vector<uint8_t>& gc_map) {
+void MethodVerifier::SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* gc_map) {
   DCHECK(Runtime::Current()->IsCompiler());
   {
     WriterMutexLock mu(Thread::Current(), *dex_gc_maps_lock_);
@@ -4159,7 +4161,7 @@
       delete it->second;
       dex_gc_maps_->erase(it);
     }
-    dex_gc_maps_->Put(ref, &gc_map);
+    dex_gc_maps_->Put(ref, gc_map);
   }
   DCHECK(GetDexGcMap(ref) != NULL);
 }
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 57fde1d..f72898e 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -33,6 +33,7 @@
 #include "reg_type_cache-inl.h"
 #include "register_line.h"
 #include "safe_map.h"
+#include "sirt_ref.h"
 #include "UniquePtr.h"
 
 namespace art {
@@ -142,14 +143,15 @@
   static FailureKind VerifyClass(const mirror::Class* klass, bool allow_soft_failures,
                                  std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static FailureKind VerifyClass(const DexFile* dex_file, mirror::DexCache* dex_cache,
-                                 mirror::ClassLoader* class_loader,
+  static FailureKind VerifyClass(const DexFile* dex_file, SirtRef<mirror::DexCache>& dex_cache,
+                                 SirtRef<mirror::ClassLoader>& class_loader,
                                  const DexFile::ClassDef* class_def,
                                  bool allow_soft_failures, std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void VerifyMethodAndDump(std::ostream& os, uint32_t method_idx, const DexFile* dex_file,
-                                  mirror::DexCache* dex_cache, mirror::ClassLoader* class_loader,
+                                  SirtRef<mirror::DexCache>& dex_cache,
+                                  SirtRef<mirror::ClassLoader>& class_loader,
                                   const DexFile::ClassDef* class_def,
                                   const DexFile::CodeItem* code_item,
                                   mirror::ArtMethod* method, uint32_t method_access_flags)
@@ -217,16 +219,13 @@
     return can_load_classes_;
   }
 
-  MethodVerifier(const DexFile* dex_file, mirror::DexCache* dex_cache,
-                 mirror::ClassLoader* class_loader, const DexFile::ClassDef* class_def,
-                 const DexFile::CodeItem* code_item,
-                 uint32_t method_idx, mirror::ArtMethod* method,
+  MethodVerifier(const DexFile* dex_file, SirtRef<mirror::DexCache>* dex_cache,
+                 SirtRef<mirror::ClassLoader>* class_loader, const DexFile::ClassDef* class_def,
+                 const DexFile::CodeItem* code_item, uint32_t method_idx, mirror::ArtMethod* method,
                  uint32_t access_flags, bool can_load_classes, bool allow_soft_failures)
           SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ~MethodVerifier() {
-    STLDeleteElements(&failure_messages_);
-  }
+  ~MethodVerifier();
 
   // Run verification on the method. Returns true if verification completes and false if the input
   // has an irrecoverable corruption.
@@ -257,8 +256,8 @@
    *      for code flow problems.
    */
   static FailureKind VerifyMethod(uint32_t method_idx, const DexFile* dex_file,
-                                  mirror::DexCache* dex_cache,
-                                  mirror::ClassLoader* class_loader,
+                                  SirtRef<mirror::DexCache>& dex_cache,
+                                  SirtRef<mirror::ClassLoader>& class_loader,
                                   const DexFile::ClassDef* class_def_idx,
                                   const DexFile::CodeItem* code_item,
                                   mirror::ArtMethod* method, uint32_t method_access_flags,
@@ -615,10 +614,10 @@
    * encode it in some clever fashion.
    * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
    */
-  const std::vector<uint8_t>* GenerateGcMap();
+  const std::vector<uint8_t>* GenerateLengthPrefixedGcMap();
 
   // Verify that the GC map associated with method_ is well formed
-  void VerifyGcMap(const std::vector<uint8_t>& data);
+  void VerifyLengthPrefixedGcMap(const std::vector<uint8_t>& data);
 
   // Compute sizes for GC map data
   void ComputeGcMapSizes(size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
@@ -630,7 +629,7 @@
       MethodReferenceComparator> DexGcMapTable;
   static ReaderWriterMutex* dex_gc_maps_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   static DexGcMapTable* dex_gc_maps_ GUARDED_BY(dex_gc_maps_lock_);
-  static void SetDexGcMap(MethodReference ref, const std::vector<uint8_t>& dex_gc_map)
+  static void SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* dex_gc_map)
       LOCKS_EXCLUDED(dex_gc_maps_lock_);
 
 
@@ -685,9 +684,9 @@
   const RegType* return_type_;  // Lazily computed return type of the method.
   const DexFile* const dex_file_;  // The dex file containing the method.
   // The dex_cache for the declaring class of the method.
-  mirror::DexCache* dex_cache_ GUARDED_BY(Locks::mutator_lock_);
+  SirtRef<mirror::DexCache>* dex_cache_ GUARDED_BY(Locks::mutator_lock_);
   // The class loader for the declaring class of the method.
-  mirror::ClassLoader* class_loader_ GUARDED_BY(Locks::mutator_lock_);
+  SirtRef<mirror::ClassLoader>* class_loader_ GUARDED_BY(Locks::mutator_lock_);
   const DexFile::ClassDef* const class_def_;  // The class def of the declaring class of the method.
   const DexFile::CodeItem* const code_item_;  // The code item containing the code for the method.
   const RegType* declaring_class_;  // Lazily computed reg type of the method's declaring class.
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 50d1583..d82e75d 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -928,7 +928,8 @@
     }
     mirror::Class* common_elem = ClassJoin(s_ct, t_ct);
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    mirror::ClassLoader* class_loader = s->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::ClassLoader> class_loader(self, s->GetClassLoader());
     std::string descriptor("[");
     descriptor += ClassHelper(common_elem).GetDescriptor();
     mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 446dd00..9c9673a 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -140,9 +140,10 @@
   // Class was not found, must create new type.
   // Try resolving class
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  SirtRef<mirror::ClassLoader> class_loader(Thread::Current(), loader);
   mirror::Class* klass = NULL;
   if (can_load_classes_) {
-    klass = class_linker->FindClass(descriptor, loader);
+    klass = class_linker->FindClass(descriptor, class_loader);
   } else {
     klass = class_linker->LookupClass(descriptor, loader);
     if (klass != NULL && !klass->IsLoaded()) {
@@ -261,11 +262,11 @@
     FloatType::Destroy();
     DoubleLoType::Destroy();
     DoubleHiType::Destroy();
-    for (uint16_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
+    for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
       PreciseConstType* type = small_precise_constants_[value - kMinSmallConstant];
       delete type;
+      small_precise_constants_[value - kMinSmallConstant] = nullptr;
     }
-
     RegTypeCache::primitive_initialized_ = false;
     RegTypeCache::primitive_count_ = 0;
   }
diff --git a/test/040-miranda/expected.txt b/test/040-miranda/expected.txt
index e22bbd9..011be2a 100644
--- a/test/040-miranda/expected.txt
+++ b/test/040-miranda/expected.txt
@@ -10,3 +10,5 @@
   inInterface:  true
   inInterface2: 28
   inAbstract:   true
+Test getting miranda method via reflection:
+  caught expected NoSuchMethodException
diff --git a/test/040-miranda/src/Main.java b/test/040-miranda/src/Main.java
index 1fd8287..ff5eba0 100644
--- a/test/040-miranda/src/Main.java
+++ b/test/040-miranda/src/Main.java
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Method;
+
 /**
  * Miranda testing.
  */
@@ -37,5 +39,16 @@
         System.out.println("  inInterface:  " + mira2.inInterface());
         System.out.println("  inInterface2: " + mira2.inInterface2());
         System.out.println("  inAbstract:   " + mira2.inAbstract());
+
+        System.out.println("Test getting miranda method via reflection:");
+        try {
+          Class mirandaClass = Class.forName("MirandaAbstract");
+          Method mirandaMethod = mirandaClass.getDeclaredMethod("inInterface", (Class[]) null);
+          System.out.println("  did not expect to find miranda method");
+        } catch (NoSuchMethodException nsme) {
+          System.out.println("  caught expected NoSuchMethodException");
+        } catch (Exception e) {
+          System.out.println("  caught unexpected exception " + e);
+        }
     }
 }
diff --git a/test/JniTest/JniTest.java b/test/JniTest/JniTest.java
index 7014ef9..a1b1f0c 100644
--- a/test/JniTest/JniTest.java
+++ b/test/JniTest/JniTest.java
@@ -14,11 +14,14 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Method;
+
 class JniTest {
     public static void main(String[] args) {
         System.loadLibrary("arttest");
         testFindClassOnAttachedNativeThread();
         testCallStaticVoidMethodOnSubClass();
+        testGetMirandaMethod();
     }
 
     private static native void testFindClassOnAttachedNativeThread();
@@ -42,4 +45,23 @@
     private static class testCallStaticVoidMethodOnSubClass_SubClass
         extends testCallStaticVoidMethodOnSubClass_SuperClass {
     }
+
+    private static native Method testGetMirandaMethodNative();
+
+    private static void testGetMirandaMethod() {
+        Method m = testGetMirandaMethodNative();
+        if (m.getDeclaringClass() != testGetMirandaMethod_MirandaInterface.class) {
+            throw new AssertionError();
+        }
+    }
+
+    private static abstract class testGetMirandaMethod_MirandaAbstract implements testGetMirandaMethod_MirandaInterface {
+        public boolean inAbstract() {
+            return true;
+        }
+    }
+
+    private static interface testGetMirandaMethod_MirandaInterface {
+        public boolean inInterface();
+    }
 }
diff --git a/test/JniTest/jni_test.cc b/test/JniTest/jni_test.cc
index 72a3309..cfcbb64 100644
--- a/test/JniTest/jni_test.cc
+++ b/test/JniTest/jni_test.cc
@@ -81,3 +81,11 @@
 
   env->CallStaticVoidMethod(sub_class, execute);
 }
+
+extern "C" JNIEXPORT jobject JNICALL Java_JniTest_testGetMirandaMethodNative(JNIEnv* env, jclass) {
+  jclass abstract_class = env->FindClass("JniTest$testGetMirandaMethod_MirandaAbstract");
+  assert(abstract_class != NULL);
+  jmethodID miranda_method = env->GetMethodID(abstract_class, "inInterface", "()Z");
+  assert(miranda_method != NULL);
+  return env->ToReflectedMethod(abstract_class, miranda_method, JNI_FALSE);
+}
diff --git a/test/run-test b/test/run-test
index f706110..c3943e7 100755
--- a/test/run-test
+++ b/test/run-test
@@ -65,7 +65,7 @@
 dev_mode="no"
 update_mode="no"
 debug_mode="no"
-dalvik_mode="no"
+runtime="art"
 usage="no"
 build_only="no"
 
@@ -77,6 +77,7 @@
         shift
     elif [ "x$1" = "x--jvm" ]; then
         target_mode="no"
+        runtime="jvm"
         RUN="${progdir}/etc/reference-run-test-classes"
         NEED_DEX="false"
         shift
@@ -85,7 +86,7 @@
         shift
     elif [ "x$1" = "x--dalvik" ]; then
         lib="libdvm.so"
-        dalvik_mode="yes"
+        runtime="dalvik"
         shift
     elif [ "x$1" = "x--image" ]; then
         shift
@@ -155,15 +156,11 @@
     fi
 done
 
-run_args="${run_args} --lib $lib"
+if [ ! "$runtime" = "jvm" ]; then
+  run_args="${run_args} --lib $lib"
+fi
 
-if [ "$dalvik_mode" = "no" ]; then
-    if [ "$target_mode" = "no" ]; then
-        run_args="${run_args} --boot -Ximage:${ANDROID_HOST_OUT}/framework/core.art"
-    else
-        run_args="${run_args} --boot -Ximage:/data/art-test/core.art"
-    fi
-else
+if [ "$runtime" = "dalvik" ]; then
     if [ "$target_mode" = "no" ]; then
         framework="${OUT}/system/framework"
         bpath="${framework}/core.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/core-junit.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
@@ -171,6 +168,12 @@
     else
         true # defaults to using target BOOTCLASSPATH
     fi
+elif [ "$runtime" = "art" ]; then
+    if [ "$target_mode" = "no" ]; then
+        run_args="${run_args} --boot -Ximage:${ANDROID_HOST_OUT}/framework/core.art"
+    else
+        run_args="${run_args} --boot -Ximage:/data/art-test/core.art"
+    fi
 fi
 
 if [ "$dev_mode" = "yes" -a "$update_mode" = "yes" ]; then