Merge branch 'dalvik-dev' of persistent-https://googleplex-android.git.corp.google.com/platform/art into merge-goog-dalvik-dev-to-aosp-master
diff --git a/Android.mk b/Android.mk
index 3112ab0..76fb411 100644
--- a/Android.mk
+++ b/Android.mk
@@ -18,6 +18,7 @@
 
 art_path := $(LOCAL_PATH)
 art_build_path := $(art_path)/build
+include $(art_build_path)/Android.common.mk
 
 ########################################################################
 # clean-oat targets
@@ -146,14 +147,9 @@
 test-art-host-gtest: $(ART_HOST_TEST_TARGETS)
 	@echo test-art-host-gtest PASSED
 
-define run-host-gtests-with
-  $(foreach file,$(sort $(ART_HOST_TEST_EXECUTABLES)),$(1) $(file) &&) true
-endef
-
 # "mm valgrind-test-art-host-gtest" to build and run the host gtests under valgrind.
 .PHONY: valgrind-test-art-host-gtest
-valgrind-test-art-host-gtest: test-art-host-dependencies
-	$(call run-host-gtests-with,valgrind --leak-check=full)
+valgrind-test-art-host-gtest: $(ART_HOST_VALGRIND_TEST_TARGETS)
 	@echo valgrind-test-art-host-gtest PASSED
 
 .PHONY: test-art-host-oat-default
@@ -305,6 +301,8 @@
 ########################################################################
 # oatdump targets
 
+ART_DUMP_OAT_PATH ?= $(OUT_DIR)
+
 .PHONY: dump-oat
 dump-oat: dump-oat-core dump-oat-boot
 
@@ -314,29 +312,29 @@
 .PHONY: dump-oat-core-host
 ifeq ($(ART_BUILD_HOST),true)
 dump-oat-core-host: $(HOST_CORE_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(HOST_CORE_IMG_OUT) --output=/tmp/core.host.oatdump.txt --host-prefix=""
-	@echo Output in /tmp/core.host.oatdump.txt
+	$(OATDUMP) --image=$(HOST_CORE_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/core.host.oatdump.txt --host-prefix=""
+	@echo Output in $(ART_DUMP_OAT_PATH)/core.host.oatdump.txt
 endif
 
 .PHONY: dump-oat-core-target
 ifeq ($(ART_BUILD_TARGET),true)
 dump-oat-core-target: $(TARGET_CORE_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(TARGET_CORE_IMG_OUT) --output=/tmp/core.target.oatdump.txt
-	@echo Output in /tmp/core.target.oatdump.txt
+	$(OATDUMP) --image=$(TARGET_CORE_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/core.target.oatdump.txt
+	@echo Output in $(ART_DUMP_OAT_PATH)/core.target.oatdump.txt
 endif
 
 .PHONY: dump-oat-boot
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
 dump-oat-boot: $(TARGET_BOOT_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(TARGET_BOOT_IMG_OUT) --output=/tmp/boot.oatdump.txt
-	@echo Output in /tmp/boot.oatdump.txt
+	$(OATDUMP) --image=$(TARGET_BOOT_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt
+	@echo Output in $(ART_DUMP_OAT_PATH)/boot.oatdump.txt
 endif
 
 .PHONY: dump-oat-Calculator
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
 dump-oat-Calculator: $(TARGET_OUT_APPS)/Calculator.odex $(TARGET_BOOT_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --oat-file=$< --output=/tmp/Calculator.oatdump.txt
-	@echo Output in /tmp/Calculator.oatdump.txt
+	$(OATDUMP) --oat-file=$< --output=$(ART_DUMP_OAT_PATH)/Calculator.oatdump.txt
+	@echo Output in $(ART_DUMP_OAT_PATH)/Calculator.oatdump.txt
 endif
 
 ########################################################################
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 655c7dd..bed48ba 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -22,6 +22,7 @@
 	compiler/elf_writer_test.cc \
 	compiler/image_test.cc \
 	compiler/jni/jni_compiler_test.cc \
+	compiler/leb128_encoder_test.cc \
 	compiler/oat_test.cc \
 	compiler/output_stream_test.cc \
 	compiler/utils/dedupe_set_test.cc \
@@ -80,6 +81,7 @@
 ART_HOST_TEST_EXECUTABLES :=
 ART_TARGET_TEST_EXECUTABLES :=
 ART_HOST_TEST_TARGETS :=
+ART_HOST_VALGRIND_TEST_TARGETS :=
 ART_TARGET_TEST_TARGETS :=
 
 ART_TEST_CFLAGS :=
@@ -170,6 +172,13 @@
 	@echo $$@ PASSED
 
 ART_HOST_TEST_TARGETS += $$(art_gtest_target)
+
+.PHONY: valgrind-$$(art_gtest_target)
+valgrind-$$(art_gtest_target): $$(art_gtest_exe) test-art-host-dependencies
+	valgrind --leak-check=full --error-exitcode=1 $$<
+	@echo $$@ PASSED
+
+ART_HOST_VALGRIND_TEST_TARGETS += valgrind-$$(art_gtest_target)
 endif
 endef
 
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index f964346..c04b38b 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -26,7 +26,7 @@
 
 # By default, do not run rerun dex2oat if the tool changes.
 # Comment out the | to force dex2oat to rerun on after all changes.
-DEX2OAT_DEPENDENCY := #|
+DEX2OAT_DEPENDENCY := |
 DEX2OAT_DEPENDENCY += $(DEX2OAT)
 DEX2OAT_DEPENDENCY += $(LIBART_COMPILER)
 
diff --git a/compiler/Android.mk b/compiler/Android.mk
index fc2f02b..b7dc9f6 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -23,6 +23,7 @@
 	dex/local_value_numbering.cc \
 	dex/arena_allocator.cc \
 	dex/arena_bit_vector.cc \
+	dex/quick/arm/arm_dex_file_method_inliner.cc \
 	dex/quick/arm/assemble_arm.cc \
 	dex/quick/arm/call_arm.cc \
 	dex/quick/arm/fp_arm.cc \
@@ -30,6 +31,8 @@
 	dex/quick/arm/target_arm.cc \
 	dex/quick/arm/utility_arm.cc \
 	dex/quick/codegen_util.cc \
+	dex/quick/dex_file_method_inliner.cc \
+	dex/quick/dex_file_to_method_inliner_map.cc \
 	dex/quick/gen_common.cc \
 	dex/quick/gen_invoke.cc \
 	dex/quick/gen_loadstore.cc \
@@ -38,6 +41,7 @@
 	dex/quick/mips/call_mips.cc \
 	dex/quick/mips/fp_mips.cc \
 	dex/quick/mips/int_mips.cc \
+	dex/quick/mips/mips_dex_file_method_inliner.cc \
 	dex/quick/mips/target_mips.cc \
 	dex/quick/mips/utility_mips.cc \
 	dex/quick/mir_to_lir.cc \
@@ -48,6 +52,7 @@
 	dex/quick/x86/int_x86.cc \
 	dex/quick/x86/target_x86.cc \
 	dex/quick/x86/utility_x86.cc \
+	dex/quick/x86/x86_dex_file_method_inliner.cc \
 	dex/portable/mir_to_gbc.cc \
 	dex/dex_to_dex_compiler.cc \
 	dex/mir_dataflow.cc \
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index fd46975..3798b45 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -20,7 +20,6 @@
 #include <vector>
 #include <llvm/IR/Module.h>
 #include "arena_allocator.h"
-#include "backend.h"
 #include "compiler_enums.h"
 #include "dex/quick/mir_to_lir.h"
 #include "dex_instruction.h"
@@ -39,39 +38,14 @@
 }  // namespace llvm
 
 struct ArenaMemBlock;
+class Backend;
 struct Memstats;
 class MIRGraph;
 class Mir2Lir;
 
 struct CompilationUnit {
-  explicit CompilationUnit(ArenaPool* pool)
-    : compiler_driver(NULL),
-      class_linker(NULL),
-      dex_file(NULL),
-      class_loader(NULL),
-      class_def_idx(0),
-      method_idx(0),
-      code_item(NULL),
-      access_flags(0),
-      invoke_type(kDirect),
-      shorty(NULL),
-      disable_opt(0),
-      enable_debug(0),
-      verbose(false),
-      compiler_backend(kNoBackend),
-      instruction_set(kNone),
-      num_dalvik_registers(0),
-      insns(NULL),
-      num_ins(0),
-      num_outs(0),
-      num_regs(0),
-      num_compiler_temps(0),
-      compiler_flip_match(false),
-      arena(pool),
-      mir_graph(NULL),
-      cg(NULL),
-      timings("QuickCompiler", true, false) {
-      }
+  explicit CompilationUnit(ArenaPool* pool);
+  ~CompilationUnit();
 
   void StartTimingSplit(const char* label);
   void NewTimingSplit(const char* label);
@@ -120,7 +94,7 @@
 
   UniquePtr<MIRGraph> mir_graph;   // MIR container.
   UniquePtr<Backend> cg;           // Target-specific codegen.
-  base::TimingLogger timings;
+  TimingLogger timings;
 };
 
 }  // namespace art
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 2f8521f..e53d636 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -31,6 +31,8 @@
 #include "llvm/llvm_compilation_unit.h"
 #endif
 
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+
 namespace {
 #if !defined(ART_USE_PORTABLE_COMPILER)
   pthread_once_t llvm_multi_init = PTHREAD_ONCE_INIT;
@@ -61,14 +63,20 @@
 LLVMInfo::~LLVMInfo() {
 }
 
+QuickCompilerContext::QuickCompilerContext(CompilerDriver& compiler)
+  : inliner_map_(new DexFileToMethodInlinerMap(&compiler)) {
+}
+
+QuickCompilerContext::~QuickCompilerContext() {
+}
+
 extern "C" void ArtInitQuickCompilerContext(art::CompilerDriver& compiler) {
   CHECK(compiler.GetCompilerContext() == NULL);
-  LLVMInfo* llvm_info = new LLVMInfo();
-  compiler.SetCompilerContext(llvm_info);
+  compiler.SetCompilerContext(new QuickCompilerContext(compiler));
 }
 
 extern "C" void ArtUnInitQuickCompilerContext(art::CompilerDriver& compiler) {
-  delete reinterpret_cast<LLVMInfo*>(compiler.GetCompilerContext());
+  delete reinterpret_cast<QuickCompilerContext*>(compiler.GetCompilerContext());
   compiler.SetCompilerContext(NULL);
 }
 
@@ -84,6 +92,7 @@
   // (1 << kBBOpt) |
   // (1 << kMatch) |
   // (1 << kPromoteCompilerTemps) |
+  // (1 << kSuppressExceptionEdges) |
   0;
 
 static uint32_t kCompilerDebugFlags = 0 |     // Enable debug/testing modes
@@ -108,6 +117,38 @@
   // (1 << kDebugTimings) |
   0;
 
+CompilationUnit::CompilationUnit(ArenaPool* pool)
+  : compiler_driver(NULL),
+    class_linker(NULL),
+    dex_file(NULL),
+    class_loader(NULL),
+    class_def_idx(0),
+    method_idx(0),
+    code_item(NULL),
+    access_flags(0),
+    invoke_type(kDirect),
+    shorty(NULL),
+    disable_opt(0),
+    enable_debug(0),
+    verbose(false),
+    compiler_backend(kNoBackend),
+    instruction_set(kNone),
+    num_dalvik_registers(0),
+    insns(NULL),
+    num_ins(0),
+    num_outs(0),
+    num_regs(0),
+    num_compiler_temps(0),
+    compiler_flip_match(false),
+    arena(pool),
+    mir_graph(NULL),
+    cg(NULL),
+    timings("QuickCompiler", true, false) {
+}
+
+CompilationUnit::~CompilationUnit() {
+}
+
 // TODO: Add a cumulative version of logging, and combine with dex2oat --dump-timing
 void CompilationUnit::StartTimingSplit(const char* label) {
   if (enable_debug & (1 << kDebugTimings)) {
@@ -125,7 +166,7 @@
   if (enable_debug & (1 << kDebugTimings)) {
     timings.EndSplit();
     LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
-    LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+    LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
 }
 
@@ -180,7 +221,9 @@
 
   if (compiler_backend == kPortable) {
     // Fused long branches not currently useful in bitcode.
-    cu.disable_opt |= (1 << kBranchFusing);
+    cu.disable_opt |=
+        (1 << kBranchFusing) |
+        (1 << kSuppressExceptionEdges);
   }
 
   if (cu.instruction_set == kMips) {
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
index 43f6855..4a863f5 100644
--- a/compiler/dex/frontend.h
+++ b/compiler/dex/frontend.h
@@ -56,6 +56,7 @@
   kMatch,
   kPromoteCompilerTemps,
   kBranchFusing,
+  kSuppressExceptionEdges,
 };
 
 // Force code generation paths for testing.
@@ -81,6 +82,9 @@
   kDebugTimings
 };
 
+class DexFileToMethodInlinerMap;
+class CompilerDriver;
+
 class LLVMInfo {
   public:
     LLVMInfo();
@@ -109,6 +113,19 @@
     UniquePtr<art::llvm::IRBuilder> ir_builder_;
 };
 
+class QuickCompilerContext {
+  public:
+    explicit QuickCompilerContext(CompilerDriver& compiler);
+    ~QuickCompilerContext();
+
+    DexFileToMethodInlinerMap* GetInlinerMap() {
+      return inliner_map_.get();
+    }
+
+  private:
+    UniquePtr<DexFileToMethodInlinerMap> inliner_map_;
+};
+
 struct CompilationUnit;
 struct BasicBlock;
 
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 35d2923..75883b7 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -380,7 +380,9 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Establish value number for loaded register. Note use of memory version.
@@ -419,7 +421,9 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Rev the memory version
@@ -443,7 +447,9 @@
         } else {
           null_checked_.insert(base);
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         uint16_t memory_version = GetMemoryVersion(base, field_ref);
         if (opcode == Instruction::IGET_WIDE) {
@@ -473,7 +479,9 @@
         } else {
           null_checked_.insert(base);
         }
-        mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        if (mir->meta.throw_insn != NULL) {
+          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
+        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         AdvanceMemoryVersion(base, field_ref);
       }
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 11e19dc..d359ee2 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1243,12 +1243,13 @@
     if (mir->ssa_rep == NULL) {
       continue;
     }
-    // Each level of nesting adds *16 to count, up to 3 levels deep.
-    uint32_t weight = std::min(3U, static_cast<uint32_t>(bb->nesting_depth) * 4);
+    // Each level of nesting adds *100 to count, up to 3 levels deep.
+    uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
+    uint32_t weight = std::max(1U, depth * 100);
     for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
       int s_reg = mir->ssa_rep->uses[i];
       raw_use_counts_.Increment(s_reg);
-      use_counts_.Put(s_reg, use_counts_.Get(s_reg) + (1 << weight));
+      use_counts_.Put(s_reg, use_counts_.Get(s_reg) + weight);
     }
     if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
       int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode];
@@ -1267,7 +1268,7 @@
         }
         if (uses_method_star) {
           raw_use_counts_.Increment(method_sreg_);
-          use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + (1 << weight));
+          use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + weight);
         }
       }
     }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index cf758fc..deaf2ff 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -365,8 +365,8 @@
 }
 
 /* Process instructions with the kSwitch flag */
-void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                                int flags) {
+BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
+                                       int width, int flags) {
   const uint16_t* switch_data =
       reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB);
   int size;
@@ -437,6 +437,7 @@
                                             /* create */ true, /* immed_pred_block_p */ NULL);
   cur_block->fall_through = fallthrough_block->id;
   fallthrough_block->predecessors->Insert(cur_block->id);
+  return cur_block;
 }
 
 /* Process instructions with the kThrow flag */
@@ -444,6 +445,9 @@
                                       int width, int flags, ArenaBitVector* try_block_addr,
                                       const uint16_t* code_ptr, const uint16_t* code_end) {
   bool in_try_block = try_block_addr->IsBitSet(cur_offset);
+  bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW);
+  bool build_all_edges =
+      (cu_->disable_opt & (1 << kSuppressExceptionEdges)) || is_throw || in_try_block;
 
   /* In try block */
   if (in_try_block) {
@@ -473,7 +477,7 @@
       cur_block->successor_blocks->Insert(successor_block_info);
       catch_block->predecessors->Insert(cur_block->id);
     }
-  } else {
+  } else if (build_all_edges) {
     BasicBlock *eh_block = NewMemBB(kExceptionHandling, num_blocks_++);
     cur_block->taken = eh_block->id;
     block_list_.Insert(eh_block);
@@ -481,7 +485,7 @@
     eh_block->predecessors->Insert(cur_block->id);
   }
 
-  if (insn->dalvikInsn.opcode == Instruction::THROW) {
+  if (is_throw) {
     cur_block->explicit_throw = true;
     if (code_ptr < code_end) {
       // Force creation of new block following THROW via side-effect
@@ -494,6 +498,16 @@
     }
   }
 
+  if (!build_all_edges) {
+    /*
+     * Even though there is an exception edge here, control cannot return to this
+     * method.  Thus, for the purposes of dataflow analysis and optimization, we can
+     * ignore the edge.  Doing this reduces compile time, and increases the scope
+     * of the basic-block level optimization pass.
+     */
+    return cur_block;
+  }
+
   /*
    * Split the potentially-throwing instruction into two parts.
    * The first half will be a pseudo-op that captures the exception
@@ -695,7 +709,7 @@
       cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_,
                                   code_ptr, code_end);
     } else if (flags & Instruction::kSwitch) {
-      ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
+      cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
     }
     current_offset_ += width;
     BasicBlock *next_block = FindBlock(current_offset_, /* split */ false, /* create */
@@ -1100,6 +1114,7 @@
 void MIRGraph::DumpMIRGraph() {
   BasicBlock* bb;
   const char* block_type_names[] = {
+    "Null Block",
     "Entry Block",
     "Code Block",
     "Exit Block",
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index a69dde0..8c20728 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -698,8 +698,8 @@
   void ProcessTryCatchBlocks();
   BasicBlock* ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
                                int flags, const uint16_t* code_ptr, const uint16_t* code_end);
-  void ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                        int flags);
+  BasicBlock* ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
+                               int flags);
   BasicBlock* ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
                               int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr,
                               const uint16_t* code_end);
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 07bd2aa..963cbeb 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -1970,7 +1970,7 @@
 
     ::llvm::OwningPtr< ::llvm::tool_output_file> out_file(
         new ::llvm::tool_output_file(fname.c_str(), errmsg,
-                                   ::llvm::sys::fs::F_Binary));
+                                   ::llvm::raw_fd_ostream::F_Binary));
 
     if (!errmsg.empty()) {
       LOG(ERROR) << "Failed to create bitcode output file: " << errmsg;
diff --git a/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc
new file mode 100644
index 0000000..257b2c4
--- /dev/null
+++ b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "dex/compiler_enums.h"
+
+#include "arm_dex_file_method_inliner.h"
+
+namespace art {
+
+const DexFileMethodInliner::IntrinsicDef ArmDexFileMethodInliner::kIntrinsicMethods[] = {
+#define INTRINSIC(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, d } }
+
+    INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
+    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+
+    INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
+    INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+
+    INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
+    INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
+
+    INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
+    INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
+    INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
+    INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
+
+    INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
+
+    INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
+    INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
+    INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
+    INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
+
+    INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas,
+              kIntrinsicFlagNone),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsLong),
+    INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas,
+              kIntrinsicFlagIsObject),
+
+#define UNSAFE_GET_PUT(type, code, type_flags) \
+    INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              type_flags & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              (type_flags | kIntrinsicFlagIsVolatile) & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags), \
+    INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsVolatile), \
+    INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsOrdered)
+
+    UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
+    UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
+    UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
+#undef UNSAFE_GET_PUT
+
+#undef INTRINSIC
+};
+
+ArmDexFileMethodInliner::ArmDexFileMethodInliner() {
+}
+
+ArmDexFileMethodInliner::~ArmDexFileMethodInliner() {
+}
+
+void ArmDexFileMethodInliner::FindIntrinsics(const DexFile* dex_file) {
+  IndexCache cache;
+  DoFindIntrinsics(dex_file, &cache, kIntrinsicMethods, arraysize(kIntrinsicMethods));
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/arm/arm_dex_file_method_inliner.h b/compiler/dex/quick/arm/arm_dex_file_method_inliner.h
new file mode 100644
index 0000000..3428391
--- /dev/null
+++ b/compiler/dex/quick/arm/arm_dex_file_method_inliner.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_ARM_ARM_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_ARM_ARM_DEX_FILE_METHOD_INLINER_H_
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class ArmDexFileMethodInliner : public DexFileMethodInliner {
+  public:
+    ArmDexFileMethodInliner();
+    ~ArmDexFileMethodInliner();
+
+    void FindIntrinsics(const DexFile* dex_file);
+
+  private:
+    static const IntrinsicDef kIntrinsicMethods[];
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_ARM_ARM_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index ffaaf84..8cd7c94 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -334,7 +334,7 @@
   kThumb2VcvtDF,     // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
   kThumb2Vsqrts,     // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
   kThumb2Vsqrtd,     // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
-  kThumb2MovImmShift,  // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
+  kThumb2MovI8M,     // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
   kThumb2MovImm16,   // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
   kThumb2StrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
   kThumb2LdrRRI12,   // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
@@ -346,14 +346,14 @@
   kThumb2MovRR,      // mov rd, rm [11101010010011110000] rd[11..8] [0000] rm[3..0].
   kThumb2Vmovs,      // vmov.f32 vd, vm [111011101] D [110000] vd[15..12] 101001] M [0] vm[3..0].
   kThumb2Vmovd,      // vmov.f64 vd, vm [111011101] D [110000] vd[15..12] 101101] M [0] vm[3..0].
-  kThumb2Ldmia,      // ldmia  [111010001001[ rn[19..16] mask[15..0].
-  kThumb2Stmia,      // stmia  [111010001000[ rn[19..16] mask[15..0].
+  kThumb2Ldmia,      // ldmia  [111010001001] rn[19..16] mask[15..0].
+  kThumb2Stmia,      // stmia  [111010001000] rn[19..16] mask[15..0].
   kThumb2AddRRR,     // add [111010110000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2SubRRR,     // sub [111010111010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2SbcRRR,     // sbc [111010110110] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2CmpRR,      // cmp [111010111011] rn[19..16] [0000] [1111] [0000] rm[3..0].
-  kThumb2SubRRI12,   // sub rd, rn, #imm12 [11110] i [01010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
-  kThumb2MvnImm12,   // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
+  kThumb2SubRRI12,   // sub rd, rn, #imm12 [11110] i [101010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2MvnI8M,     // mov(T2) rd, #<const> [11110] i [00011011110] imm3 rd[11..8] imm8.
   kThumb2Sel,        // sel rd, rn, rm [111110101010] rn[19-16] rd[11-8] rm[3-0].
   kThumb2Ubfx,       // ubfx rd,rn,#lsb,#width [111100111100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
   kThumb2Sbfx,       // ubfx rd,rn,#lsb,#width [111100110100] rn[19..16] [0] imm3[14-12] rd[11-8] w[4-0].
@@ -373,7 +373,8 @@
   kThumb2StrbRRI12,  // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0].
   kThumb2Pop,        // pop   [1110100010111101] list[15-0]*/
   kThumb2Push,       // push  [1110100100101101] list[15-0]*/
-  kThumb2CmpRI12,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
+  kThumb2CmpRI8M,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
+  kThumb2CmnRI8M,    // cmn rn, #<const> [11110] i [010001] rn[19-16] [0] imm3 [1111] imm8[7..0].
   kThumb2AdcRRR,     // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2AndRRR,     // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2BicRRR,     // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
@@ -383,7 +384,7 @@
   kThumb2SdivRRR,    // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
   kThumb2UdivRRR,    // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
   kThumb2MnvRR,      // mvn [11101010011011110] rd[11-8] [0000] rm[3..0].
-  kThumb2RsubRRI8,   // rsub [111100011100] rn[19..16] [0000] rd[11..8] imm8[7..0].
+  kThumb2RsubRRI8M,  // rsb rd, rn, #<const> [11110] i [011101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2NegRR,      // actually rsub rd, rn, #0.
   kThumb2OrrRRR,     // orr [111010100100] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2TstRR,      // tst [111010100001] rn[19..16] [0000] [1111] [0000] rm[3..0].
@@ -395,14 +396,14 @@
   kThumb2LsrRRI5,    // lsr [11101010010011110] imm[14.12] rd[11..8] [01] rm[3..0].
   kThumb2AsrRRI5,    // asr [11101010010011110] imm[14.12] rd[11..8] [10] rm[3..0].
   kThumb2RorRRI5,    // ror [11101010010011110] imm[14.12] rd[11..8] [11] rm[3..0].
-  kThumb2BicRRI8,    // bic [111100000010] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2AndRRI8,    // bic [111100000000] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2OrrRRI8,    // orr [111100000100] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2EorRRI8,    // eor [111100001000] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2AddRRI8,    // add [111100001000] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2AdcRRI8,    // adc [111100010101] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2SubRRI8,    // sub [111100011011] rn[19..16] [0] imm3 rd[11..8] imm8.
-  kThumb2SbcRRI8,    // sbc [111100010111] rn[19..16] [0] imm3 rd[11..8] imm8.
+  kThumb2BicRRI8M,   // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AndRRI8M,   // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2OrrRRI8M,   // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2EorRRI8M,   // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AddRRI8M,   // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2AdcRRI8M,   // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2SubRRI8M,   // sub rd, rn, #<const> [11110] i [011011] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2SbcRRI8M,   // sub rd, rn, #<const> [11110] i [010111] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2RevRR,      // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0]
   kThumb2RevshRR,    // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0]
   kThumb2It,         // it [10111111] firstcond[7-4] mask[3-0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 3d0f263..1c81a5a 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -489,7 +489,7 @@
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MovImmShift, 0xf04f0000, /* no setflags encoding */
+    ENCODING_MAP(kThumb2MovI8M, 0xf04f0000, /* no setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
                  "mov", "!0C, #!1m", 4, kFixupNone),
@@ -573,8 +573,8 @@
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
                  "sub", "!0C,!1C,#!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2MvnImm12,  0xf06f0000, /* no setflags encoding */
-                 kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
+    ENCODING_MAP(kThumb2MvnI8M,  0xf06f0000, /* no setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
                  "mvn", "!0C, #!1n", 4, kFixupNone),
     ENCODING_MAP(kThumb2Sel,       0xfaa0f080,
@@ -656,11 +656,16 @@
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
                  | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
-    ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00,
+    ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00,
                  kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_USE0 | SETS_CCODES,
                  "cmp", "!0C, #!1m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2CmnRI8M, 0xf1100f00,
+                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmn", "!0C, #!1m", 4, kFixupNone),
     ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtShift, -1, -1,
@@ -699,11 +704,11 @@
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "mvn", "!0C, !1C, shift !2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2RsubRRI8,       0xf1d00000,
+    ENCODING_MAP(kThumb2RsubRRI8M,       0xf1d00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
-                 "rsb", "!0C,!1C,#!2m", 4, kFixupNone),
+                 "rsbs", "!0C,!1C,#!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
@@ -750,38 +755,38 @@
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "ror", "!0C, !1C, #!2d", 4, kFixupNone),
-    ENCODING_MAP(kThumb2BicRRI8,  0xf0200000,
+    ENCODING_MAP(kThumb2BicRRI8M,  0xf0200000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "bic", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AndRRI8,  0xf0000000,
+    ENCODING_MAP(kThumb2AndRRI8M,  0xf0000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "and", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2OrrRRI8,  0xf0400000,
+    ENCODING_MAP(kThumb2OrrRRI8M,  0xf0400000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "orr", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2EorRRI8,  0xf0800000,
+    ENCODING_MAP(kThumb2EorRRI8M,  0xf0800000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "eor", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AddRRI8,  0xf1100000,
+    ENCODING_MAP(kThumb2AddRRI8M,  0xf1100000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
                  "adds", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2AdcRRI8,  0xf1500000,
+    ENCODING_MAP(kThumb2AdcRRI8M,  0xf1500000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
                  "adcs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SubRRI8,  0xf1b00000,
+    ENCODING_MAP(kThumb2SubRRI8M,  0xf1b00000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
                  "subs", "!0C, !1C, #!2m", 4, kFixupNone),
-    ENCODING_MAP(kThumb2SbcRRI8,  0xf1700000,
+    ENCODING_MAP(kThumb2SbcRRI8M,  0xf1700000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 15355be..de3223a 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -104,7 +104,7 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                   RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-    bool GenInlinedCas32(CallInfo* info, bool need_write_barrier);
+    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
     bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 480e021..1575ece 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -274,7 +274,7 @@
   NewLIR0(kThumb2Fmstat);
 
   OpIT((default_result == -1) ? kCondGt : kCondMi, "");
-  NewLIR2(kThumb2MovImmShift, rl_result.low_reg,
+  NewLIR2(kThumb2MovI8M, rl_result.low_reg,
           ModifiedImmediate(-default_result));  // Must not alter ccodes
   GenBarrier();
 
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 42bf3d4..9727179 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -97,7 +97,7 @@
   LIR* branch3 = OpCondBranch(kCondEq, NULL);
 
   OpIT(kCondHi, "E");
-  NewLIR2(kThumb2MovImmShift, t_reg, ModifiedImmediate(-1));
+  NewLIR2(kThumb2MovI8M, t_reg, ModifiedImmediate(-1));
   LoadConstant(t_reg, 1);
   GenBarrier();
 
@@ -299,7 +299,6 @@
 LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, int check_value,
                                 LIR* target) {
   LIR* branch;
-  int mod_imm;
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
   /*
    * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit
@@ -317,16 +316,7 @@
     branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
                      reg, 0);
   } else {
-    mod_imm = ModifiedImmediate(check_value);
-    if (ARM_LOWREG(reg) && ((check_value & 0xff) == check_value)) {
-      NewLIR2(kThumbCmpRI8, reg, check_value);
-    } else if (mod_imm >= 0) {
-      NewLIR2(kThumb2CmpRI12, reg, mod_imm);
-    } else {
-      int t_reg = AllocTemp();
-      LoadConstant(t_reg, check_value);
-      OpRegReg(kOpCmp, reg, t_reg);
-    }
+    OpRegImm(kOpCmp, reg, check_value);
     branch = NewLIR2(kThumbBCond, 0, arm_cond);
   }
   branch->target = target;
@@ -570,14 +560,15 @@
   LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
 }
 
-bool ArmMir2Lir::GenInlinedCas32(CallInfo* info, bool need_write_barrier) {
+bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
+  DCHECK(!is_long);  // not supported yet
   DCHECK_EQ(cu_->instruction_set, kThumb2);
   // Unused - RegLocation rl_src_unsafe = info->args[0];
-  RegLocation rl_src_obj= info->args[1];  // Object - known non-null
-  RegLocation rl_src_offset= info->args[2];  // long low
+  RegLocation rl_src_obj = info->args[1];  // Object - known non-null
+  RegLocation rl_src_offset = info->args[2];  // long low
   rl_src_offset.wide = 0;  // ignore high half in info->args[3]
-  RegLocation rl_src_expected= info->args[4];  // int or Object
-  RegLocation rl_src_new_value= info->args[5];  // int or Object
+  RegLocation rl_src_expected = info->args[4];  // int, long or Object
+  RegLocation rl_src_new_value = info->args[5];  // int, long or Object
   RegLocation rl_dest = InlineTarget(info);  // boolean place for result
 
 
@@ -587,7 +578,7 @@
   RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
   RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
 
-  if (need_write_barrier && !mir_graph_->IsConstantNullRef(rl_new_value)) {
+  if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
     // Mark card for object assuming new value is stored.
     MarkGCCard(rl_new_value.low_reg, rl_object.low_reg);
   }
@@ -1124,8 +1115,8 @@
   switch (opcode) {
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
-      NewLIR3(kThumb2AddRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
-      NewLIR3(kThumb2AdcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+      NewLIR3(kThumb2AddRRI8M, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+      NewLIR3(kThumb2AdcRRI8M, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
       break;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
@@ -1152,8 +1143,8 @@
       break;
     case Instruction::SUB_LONG_2ADDR:
     case Instruction::SUB_LONG:
-      NewLIR3(kThumb2SubRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
-      NewLIR3(kThumb2SbcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+      NewLIR3(kThumb2SubRRI8M, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+      NewLIR3(kThumb2SbcRRI8M, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
       break;
     default:
       LOG(FATAL) << "Unexpected opcode " << opcode;
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index d631cf7..8a8b168 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -184,12 +184,12 @@
   /* Check Modified immediate special cases */
   mod_imm = ModifiedImmediate(value);
   if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MovImmShift, r_dest, mod_imm);
+    res = NewLIR2(kThumb2MovI8M, r_dest, mod_imm);
     return res;
   }
   mod_imm = ModifiedImmediate(~value);
   if (mod_imm >= 0) {
-    res = NewLIR2(kThumb2MvnImm12, r_dest, mod_imm);
+    res = NewLIR2(kThumb2MvnI8M, r_dest, mod_imm);
     return res;
   }
   /* 16-bit immediate? */
@@ -446,7 +446,6 @@
   ArmOpcode alt_opcode = kThumbBkpt;
   bool all_low_regs = (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src1));
   int32_t mod_imm = ModifiedImmediate(value);
-  int32_t mod_imm_neg = ModifiedImmediate(-value);
 
   switch (op) {
     case kOpLsl:
@@ -482,47 +481,55 @@
         else
           opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3;
         return NewLIR3(opcode, r_dest, r_src1, abs_value);
-      } else if ((abs_value & 0xff) == abs_value) {
+      } else if ((abs_value & 0x3ff) == abs_value) {
         if (op == kOpAdd)
           opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12;
         else
           opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12;
         return NewLIR3(opcode, r_dest, r_src1, abs_value);
       }
-      if (mod_imm_neg >= 0) {
-        op = (op == kOpAdd) ? kOpSub : kOpAdd;
-        mod_imm = mod_imm_neg;
+      if (mod_imm < 0) {
+        mod_imm = ModifiedImmediate(-value);
+        if (mod_imm >= 0) {
+          op = (op == kOpAdd) ? kOpSub : kOpAdd;
+        }
       }
       if (op == kOpSub) {
-        opcode = kThumb2SubRRI8;
+        opcode = kThumb2SubRRI8M;
         alt_opcode = kThumb2SubRRR;
       } else {
-        opcode = kThumb2AddRRI8;
+        opcode = kThumb2AddRRI8M;
         alt_opcode = kThumb2AddRRR;
       }
       break;
     case kOpRsub:
-      opcode = kThumb2RsubRRI8;
+      opcode = kThumb2RsubRRI8M;
       alt_opcode = kThumb2RsubRRR;
       break;
     case kOpAdc:
-      opcode = kThumb2AdcRRI8;
+      opcode = kThumb2AdcRRI8M;
       alt_opcode = kThumb2AdcRRR;
       break;
     case kOpSbc:
-      opcode = kThumb2SbcRRI8;
+      opcode = kThumb2SbcRRI8M;
       alt_opcode = kThumb2SbcRRR;
       break;
     case kOpOr:
-      opcode = kThumb2OrrRRI8;
+      opcode = kThumb2OrrRRI8M;
       alt_opcode = kThumb2OrrRRR;
       break;
     case kOpAnd:
-      opcode = kThumb2AndRRI8;
+      if (mod_imm < 0) {
+        mod_imm = ModifiedImmediate(~value);
+        if (mod_imm >= 0) {
+          return NewLIR3(kThumb2BicRRI8M, r_dest, r_src1, mod_imm);
+        }
+      }
+      opcode = kThumb2AndRRI8M;
       alt_opcode = kThumb2AndRRR;
       break;
     case kOpXor:
-      opcode = kThumb2EorRRI8;
+      opcode = kThumb2EorRRI8M;
       alt_opcode = kThumb2EorRRR;
       break;
     case kOpMul:
@@ -531,15 +538,19 @@
       alt_opcode = kThumb2MulRRR;
       break;
     case kOpCmp: {
-      int mod_imm = ModifiedImmediate(value);
       LIR* res;
       if (mod_imm >= 0) {
-        res = NewLIR2(kThumb2CmpRI12, r_src1, mod_imm);
+        res = NewLIR2(kThumb2CmpRI8M, r_src1, mod_imm);
       } else {
-        int r_tmp = AllocTemp();
-        res = LoadConstant(r_tmp, value);
-        OpRegReg(kOpCmp, r_src1, r_tmp);
-        FreeTemp(r_tmp);
+        mod_imm = ModifiedImmediate(-value);
+        if (mod_imm >= 0) {
+          res = NewLIR2(kThumb2CmnRI8M, r_src1, mod_imm);
+        } else {
+          int r_tmp = AllocTemp();
+          res = LoadConstant(r_tmp, value);
+          OpRegReg(kOpCmp, r_src1, r_tmp);
+          FreeTemp(r_tmp);
+        }
       }
       return res;
     }
@@ -585,13 +596,10 @@
       }
       break;
     case kOpCmp:
-      if (ARM_LOWREG(r_dest_src1) && short_form) {
-        opcode = (short_form) ?  kThumbCmpRI8 : kThumbCmpRR;
-      } else if (ARM_LOWREG(r_dest_src1)) {
-        opcode = kThumbCmpRR;
+      if (!neg && short_form) {
+        opcode = kThumbCmpRI8;
       } else {
         short_form = false;
-        opcode = kThumbCmpHL;
       }
       break;
     default:
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index dfbc887..4bc0b35 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -920,7 +920,8 @@
       core_spill_mask_(0),
       fp_spill_mask_(0),
       first_lir_insn_(NULL),
-      last_lir_insn_(NULL) {
+      last_lir_insn_(NULL),
+      inliner_(nullptr) {
   promotion_map_ = static_cast<PromotionMap*>
       (arena_->Alloc((cu_->num_dalvik_registers  + cu_->num_compiler_temps + 1) *
                       sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc));
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
new file mode 100644
index 0000000..6c0328e
--- /dev/null
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include "base/macros.h"
+#include "dex/mir_graph.h"
+
+#include "dex_file_method_inliner.h"
+
+namespace art {
+
+const char* DexFileMethodInliner::kClassCacheNames[] = {
+    "Z",                       // kClassCacheBoolean
+    "B",                       // kClassCacheByte
+    "C",                       // kClassCacheChar
+    "S",                       // kClassCacheShort
+    "I",                       // kClassCacheInt
+    "J",                       // kClassCacheLong
+    "F",                       // kClassCacheFloat
+    "D",                       // kClassCacheDouble
+    "V",                       // kClassCacheVoid
+    "Ljava/lang/Object;",      // kClassCacheJavaLangObject
+    "Ljava/lang/String;",      // kClassCacheJavaLangString
+    "Ljava/lang/Double;",      // kClassCacheJavaLangDouble
+    "Ljava/lang/Float;",       // kClassCacheJavaLangFloat
+    "Ljava/lang/Integer;",     // kClassCacheJavaLangInteger
+    "Ljava/lang/Long;",        // kClassCacheJavaLangLong
+    "Ljava/lang/Short;",       // kClassCacheJavaLangShort
+    "Ljava/lang/Math;",        // kClassCacheJavaLangMath
+    "Ljava/lang/StrictMath;",  // kClassCacheJavaLangStrictMath
+    "Ljava/lang/Thread;",      // kClassCacheJavaLangThread
+    "Llibcore/io/Memory;",     // kClassCacheLibcoreIoMemory
+    "Lsun/misc/Unsafe;",       // kClassCacheSunMiscUnsafe
+};
+
+const char* DexFileMethodInliner::kNameCacheNames[] = {
+    "reverseBytes",          // kNameCacheReverseBytes
+    "doubleToRawLongBits",   // kNameCacheDoubleToRawLongBits
+    "longBitsToDouble",      // kNameCacheLongBitsToDouble
+    "floatToRawIntBits",     // kNameCacheFloatToRawIntBits
+    "intBitsToFloat",        // kNameCacheIntBitsToFloat
+    "abs",                   // kNameCacheAbs
+    "max",                   // kNameCacheMax
+    "min",                   // kNameCacheMin
+    "sqrt",                  // kNameCacheSqrt
+    "charAt",                // kNameCacheCharAt
+    "compareTo",             // kNameCacheCompareTo
+    "isEmpty",               // kNameCacheIsEmpty
+    "indexOf",               // kNameCacheIndexOf
+    "length",                // kNameCacheLength
+    "currentThread",         // kNameCacheCurrentThread
+    "peekByte",              // kNameCachePeekByte
+    "peekIntNative",         // kNameCachePeekIntNative
+    "peekLongNative",        // kNameCachePeekLongNative
+    "peekShortNative",       // kNameCachePeekShortNative
+    "pokeByte",              // kNameCachePokeByte
+    "pokeIntNative",         // kNameCachePokeIntNative
+    "pokeLongNative",        // kNameCachePokeLongNative
+    "pokeShortNative",       // kNameCachePokeShortNative
+    "compareAndSwapInt",     // kNameCacheCompareAndSwapInt
+    "compareAndSwapLong",    // kNameCacheCompareAndSwapLong
+    "compareAndSwapObject",  // kNameCacheCompareAndSwapObject
+    "getInt",                // kNameCacheGetInt
+    "getIntVolatile",        // kNameCacheGetIntVolatile
+    "putInt",                // kNameCachePutInt
+    "putIntVolatile",        // kNameCachePutIntVolatile
+    "putOrderedInt",         // kNameCachePutOrderedInt
+    "getLong",               // kNameCacheGetLong
+    "getLongVolatile",       // kNameCacheGetLongVolatile
+    "putLong",               // kNameCachePutLong
+    "putLongVolatile",       // kNameCachePutLongVolatile
+    "putOrderedLong",        // kNameCachePutOrderedLong
+    "getObject",             // kNameCacheGetObject
+    "getObjectVolatile",     // kNameCacheGetObjectVolatile
+    "putObject",             // kNameCachePutObject
+    "putObjectVolatile",     // kNameCachePutObjectVolatile
+    "putOrderedObject",      // kNameCachePutOrderedObject
+};
+
+const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = {
+    // kProtoCacheI_I
+    { kClassCacheInt, 1, { kClassCacheInt } },
+    // kProtoCacheJ_J
+    { kClassCacheLong, 1, { kClassCacheLong } },
+    // kProtoCacheS_S
+    { kClassCacheShort, 1, { kClassCacheShort } },
+    // kProtoCacheD_D
+    { kClassCacheDouble, 1, { kClassCacheDouble } },
+    // kProtoCacheD_J
+    { kClassCacheLong, 1, { kClassCacheDouble } },
+    // kProtoCacheJ_D
+    { kClassCacheDouble, 1, { kClassCacheLong } },
+    // kProtoCacheF_I
+    { kClassCacheInt, 1, { kClassCacheFloat } },
+    // kProtoCacheI_F
+    { kClassCacheFloat, 1, { kClassCacheInt } },
+    // kProtoCacheII_I
+    { kClassCacheInt, 2, { kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheI_C
+    { kClassCacheChar, 1, { kClassCacheInt } },
+    // kProtoCacheString_I
+    { kClassCacheInt, 1, { kClassCacheJavaLangString } },
+    // kProtoCache_Z
+    { kClassCacheBoolean, 0, { } },
+    // kProtoCache_I
+    { kClassCacheInt, 0, { } },
+    // kProtoCache_Thread
+    { kClassCacheJavaLangThread, 0, { } },
+    // kProtoCacheJ_B
+    { kClassCacheByte, 1, { kClassCacheLong } },
+    // kProtoCacheJ_I
+    { kClassCacheInt, 1, { kClassCacheLong } },
+    // kProtoCacheJ_S
+    { kClassCacheShort, 1, { kClassCacheLong } },
+    // kProtoCacheJB_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheByte } },
+    // kProtoCacheJI_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheInt } },
+    // kProtoCacheJJ_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheJS_V
+    { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheShort } },
+    // kProtoCacheObjectJII_Z
+    { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheInt, kClassCacheInt } },
+    // kProtoCacheObjectJJJ_Z
+    { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheObjectJObjectObject_Z
+    { kClassCacheBoolean, 4, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheJavaLangObject, kClassCacheJavaLangObject } },
+    // kProtoCacheObjectJ_I
+    { kClassCacheInt, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJI_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheInt } },
+    // kProtoCacheObjectJ_J
+    { kClassCacheLong, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJJ_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheLong } },
+    // kProtoCacheObjectJ_Object
+    { kClassCacheJavaLangObject, 2, { kClassCacheJavaLangObject, kClassCacheLong } },
+    // kProtoCacheObjectJObject_V
+    { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong,
+        kClassCacheJavaLangObject } },
+};
+
+DexFileMethodInliner::~DexFileMethodInliner() {
+}
+
+DexFileMethodInliner::DexFileMethodInliner()
+    : dex_file_(NULL) {
+  COMPILE_ASSERT(kClassCacheFirst == 0, kClassCacheFirst_not_0);
+  COMPILE_ASSERT(arraysize(kClassCacheNames) == kClassCacheLast, bad_arraysize_kClassCacheNames);
+  COMPILE_ASSERT(kNameCacheFirst == 0, kNameCacheFirst_not_0);
+  COMPILE_ASSERT(arraysize(kNameCacheNames) == kNameCacheLast, bad_arraysize_kNameCacheNames);
+  COMPILE_ASSERT(kProtoCacheFirst == 0, kProtoCacheFirst_not_0);
+  COMPILE_ASSERT(arraysize(kProtoCacheDefs) == kProtoCacheLast, bad_arraysize_kProtoCacheNames);
+}
+
+bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index) const {
+  return intrinsics_.find(method_index) != intrinsics_.end();
+}
+
+bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) const {
+  auto it = intrinsics_.find(info->index);
+  if (it == intrinsics_.end()) {
+    return false;
+  }
+  const Intrinsic& intrinsic = it->second;
+  switch (intrinsic.opcode) {
+    case kIntrinsicDoubleCvt:
+      return backend->GenInlinedDoubleCvt(info);
+    case kIntrinsicFloatCvt:
+      return backend->GenInlinedFloatCvt(info);
+    case kIntrinsicReverseBytes:
+      return backend->GenInlinedReverseBytes(info, static_cast<OpSize>(intrinsic.data));
+    case kIntrinsicAbsInt:
+      return backend->GenInlinedAbsInt(info);
+    case kIntrinsicAbsLong:
+      return backend->GenInlinedAbsLong(info);
+    case kIntrinsicMinMaxInt:
+      return backend->GenInlinedMinMaxInt(info, intrinsic.data & kIntrinsicFlagMin);
+    case kIntrinsicSqrt:
+      return backend->GenInlinedSqrt(info);
+    case kIntrinsicCharAt:
+      return backend->GenInlinedCharAt(info);
+    case kIntrinsicCompareTo:
+      return backend->GenInlinedStringCompareTo(info);
+    case kIntrinsicIsEmptyOrLength:
+      return backend->GenInlinedStringIsEmptyOrLength(info, intrinsic.data & kIntrinsicFlagIsEmpty);
+    case kIntrinsicIndexOf:
+      return backend->GenInlinedIndexOf(info, intrinsic.data & kIntrinsicFlagBase0);
+    case kIntrinsicCurrentThread:
+      return backend->GenInlinedCurrentThread(info);
+    case kIntrinsicPeek:
+      return backend->GenInlinedPeek(info, static_cast<OpSize>(intrinsic.data));
+    case kIntrinsicPoke:
+      return backend->GenInlinedPoke(info, static_cast<OpSize>(intrinsic.data));
+    case kIntrinsicCas:
+      return backend->GenInlinedCas(info, intrinsic.data & kIntrinsicFlagIsLong,
+                                    intrinsic.data & kIntrinsicFlagIsObject);
+    case kIntrinsicUnsafeGet:
+      return backend->GenInlinedUnsafeGet(info, intrinsic.data & kIntrinsicFlagIsLong,
+                                          intrinsic.data & kIntrinsicFlagIsVolatile);
+    case kIntrinsicUnsafePut:
+      return backend->GenInlinedUnsafePut(info, intrinsic.data & kIntrinsicFlagIsLong,
+                                          intrinsic.data & kIntrinsicFlagIsObject,
+                                          intrinsic.data & kIntrinsicFlagIsVolatile,
+                                          intrinsic.data & kIntrinsicFlagIsOrdered);
+    default:
+      LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
+      return false;  // avoid warning "control reaches end of non-void function"
+  }
+}
+
+uint32_t DexFileMethodInliner::FindClassIndex(const DexFile* dex_file, IndexCache* cache,
+                                              ClassCacheIndex index) {
+  uint32_t* class_index = &cache->class_indexes[index];
+  if (*class_index != kIndexUnresolved) {
+    return *class_index;
+  }
+
+  const DexFile::StringId* string_id = dex_file->FindStringId(kClassCacheNames[index]);
+  if (string_id == nullptr) {
+    *class_index = kIndexNotFound;
+    return *class_index;
+  }
+  uint32_t string_index = dex_file->GetIndexForStringId(*string_id);
+
+  const DexFile::TypeId* type_id = dex_file->FindTypeId(string_index);
+  if (type_id == nullptr) {
+    *class_index = kIndexNotFound;
+    return *class_index;
+  }
+  *class_index = dex_file->GetIndexForTypeId(*type_id);
+  return *class_index;
+}
+
+uint32_t DexFileMethodInliner::FindNameIndex(const DexFile* dex_file, IndexCache* cache,
+                                             NameCacheIndex index) {
+  uint32_t* name_index = &cache->name_indexes[index];
+  if (*name_index != kIndexUnresolved) {
+    return *name_index;
+  }
+
+  const DexFile::StringId* string_id = dex_file->FindStringId(kNameCacheNames[index]);
+  if (string_id == nullptr) {
+    *name_index = kIndexNotFound;
+    return *name_index;
+  }
+  *name_index = dex_file->GetIndexForStringId(*string_id);
+  return *name_index;
+}
+
+uint32_t DexFileMethodInliner::FindProtoIndex(const DexFile* dex_file, IndexCache* cache,
+                                              ProtoCacheIndex index) {
+  uint32_t* proto_index = &cache->proto_indexes[index];
+  if (*proto_index != kIndexUnresolved) {
+    return *proto_index;
+  }
+
+  const ProtoDef& proto_def = kProtoCacheDefs[index];
+  uint32_t return_index = FindClassIndex(dex_file, cache, proto_def.return_type);
+  if (return_index == kIndexNotFound) {
+    *proto_index = kIndexNotFound;
+    return *proto_index;
+  }
+  uint16_t return_type = static_cast<uint16_t>(return_index);
+  DCHECK_EQ(static_cast<uint32_t>(return_type), return_index);
+
+  uint32_t signature_length = proto_def.param_count;
+  uint16_t signature_type_idxs[kProtoMaxParams];
+  for (uint32_t i = 0; i != signature_length; ++i) {
+    uint32_t param_index = FindClassIndex(dex_file, cache, proto_def.params[i]);
+    if (param_index == kIndexNotFound) {
+      *proto_index = kIndexNotFound;
+      return *proto_index;
+    }
+    signature_type_idxs[i] = static_cast<uint16_t>(param_index);
+    DCHECK_EQ(static_cast<uint32_t>(signature_type_idxs[i]), param_index);
+  }
+
+  const DexFile::ProtoId* proto_id = dex_file->FindProtoId(return_type, signature_type_idxs,
+                                                           signature_length);
+  if (proto_id == nullptr) {
+    *proto_index = kIndexNotFound;
+    return *proto_index;
+  }
+  *proto_index = dex_file->GetIndexForProtoId(*proto_id);
+  return *proto_index;
+}
+
+uint32_t DexFileMethodInliner::FindMethodIndex(const DexFile* dex_file, IndexCache* cache,
+                                               const MethodDef& method_def) {
+  uint32_t declaring_class_index = FindClassIndex(dex_file, cache, method_def.declaring_class);
+  if (declaring_class_index == kIndexNotFound) {
+    return kIndexNotFound;
+  }
+  uint32_t name_index = FindNameIndex(dex_file, cache, method_def.name);
+  if (name_index == kIndexNotFound) {
+    return kIndexNotFound;
+  }
+  uint32_t proto_index = FindProtoIndex(dex_file, cache, method_def.proto);
+  if (proto_index == kIndexNotFound) {
+    return kIndexNotFound;
+  }
+  const DexFile::MethodId* method_id =
+      dex_file->FindMethodId(dex_file->GetTypeId(declaring_class_index),
+                             dex_file->GetStringId(name_index),
+                             dex_file->GetProtoId(proto_index));
+  if (method_id == nullptr) {
+    return kIndexNotFound;
+  }
+  return dex_file->GetIndexForMethodId(*method_id);
+}
+
+DexFileMethodInliner::IndexCache::IndexCache() {
+  std::fill_n(class_indexes, arraysize(class_indexes), kIndexUnresolved);
+  std::fill_n(name_indexes, arraysize(name_indexes), kIndexUnresolved);
+  std::fill_n(proto_indexes, arraysize(proto_indexes), kIndexUnresolved);
+}
+
+void DexFileMethodInliner::DoFindIntrinsics(const DexFile* dex_file, IndexCache* cache,
+                                            const IntrinsicDef* defs, uint32_t def_count) {
+  DCHECK(dex_file != nullptr);
+  DCHECK(dex_file_ == nullptr);
+  for (uint32_t i = 0u; i != def_count; ++i) {
+    uint32_t method_id = FindMethodIndex(dex_file, cache, defs[i].method_def);
+    if (method_id != kIndexNotFound) {
+      DCHECK(intrinsics_.find(method_id) == intrinsics_.end());
+      intrinsics_[method_id] = defs[i].intrinsic;
+    }
+  }
+  dex_file_ = dex_file;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
new file mode 100644
index 0000000..bc00513
--- /dev/null
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -0,0 +1,318 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
+
+#include <stdint.h>
+#include <map>
+
+namespace art {
+
+class CallInfo;
+class DexFile;
+class Mir2Lir;
+
+enum IntrinsicOpcode {
+  kIntrinsicDoubleCvt,
+  kIntrinsicFloatCvt,
+  kIntrinsicReverseBytes,
+  kIntrinsicAbsInt,
+  kIntrinsicAbsLong,
+  kIntrinsicMinMaxInt,
+  kIntrinsicSqrt,
+  kIntrinsicCharAt,
+  kIntrinsicCompareTo,
+  kIntrinsicIsEmptyOrLength,
+  kIntrinsicIndexOf,
+  kIntrinsicCurrentThread,
+  kIntrinsicPeek,
+  kIntrinsicPoke,
+  kIntrinsicCas,
+  kIntrinsicUnsafeGet,
+  kIntrinsicUnsafePut,
+};
+
+enum IntrinsicFlags {
+  kIntrinsicFlagNone = 0,
+
+  // kIntrinsicMinMaxInt
+  kIntrinsicFlagMax = kIntrinsicFlagNone,
+  kIntrinsicFlagMin = 1,
+
+  // kIntrinsicIsEmptyOrLength
+  kIntrinsicFlagLength  = kIntrinsicFlagNone,
+  kIntrinsicFlagIsEmpty = 1,
+
+  // kIntrinsicIndexOf
+  kIntrinsicFlagBase0 = 1,
+
+  // kIntrinsicUnsafeGet, kIntrinsicUnsafePut, kIntrinsicUnsafeCas
+  kIntrinsicFlagIsLong     = 1,
+  // kIntrinsicUnsafeGet, kIntrinsicUnsafePut
+  kIntrinsicFlagIsVolatile = 2,
+  // kIntrinsicUnsafePut, kIntrinsicUnsafeCas
+  kIntrinsicFlagIsObject   = 4,
+  // kIntrinsicUnsafePut
+  kIntrinsicFlagIsOrdered  = 8,
+};
+
+struct Intrinsic {
+  IntrinsicOpcode opcode;
+  uint32_t data;
+};
+
+/**
+ * Handles inlining of methods from a particular DexFile.
+ *
+ * Intrinsics are a special case of inline methods. The DexFile indices for
+ * all the supported intrinsic methods are looked up once by the FindIntrinsics
+ * function and cached by this class for quick lookup by the method index.
+ *
+ * TODO: Detect short methods (at least getters, setters and empty functions)
+ * from the verifier and mark them for inlining. Inline these methods early
+ * during compilation to allow further optimizations. Similarly, provide
+ * additional information about intrinsics to the early phases of compilation.
+ */
+class DexFileMethodInliner {
+  public:
+    virtual ~DexFileMethodInliner();
+
+    /**
+     * Find all known intrinsic methods in the dex_file and cache their indices.
+     */
+    virtual void FindIntrinsics(const DexFile* dex_file) = 0;
+
+    /**
+     * Check whether a particular method index corresponds to an intrinsic function.
+     */
+    bool IsIntrinsic(uint32_t method_index) const;
+
+    /**
+     * Generate code for an intrinsic function invocation.
+     *
+     * TODO: This should be target-specific. For the time being,
+     * it's shared since it dispatches everything to backend.
+     */
+    bool GenIntrinsic(Mir2Lir* backend, CallInfo* info) const;
+
+  protected:
+    DexFileMethodInliner();
+
+    /**
+     * To avoid multiple lookups of a class by its descriptor, we cache its
+     * type index in the IndexCache. These are the indexes into the IndexCache
+     * class_indexes array.
+     */
+    enum ClassCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
+      kClassCacheFirst = 0,
+      kClassCacheBoolean = kClassCacheFirst,
+      kClassCacheByte,
+      kClassCacheChar,
+      kClassCacheShort,
+      kClassCacheInt,
+      kClassCacheLong,
+      kClassCacheFloat,
+      kClassCacheDouble,
+      kClassCacheVoid,
+      kClassCacheJavaLangObject,
+      kClassCacheJavaLangString,
+      kClassCacheJavaLangDouble,
+      kClassCacheJavaLangFloat,
+      kClassCacheJavaLangInteger,
+      kClassCacheJavaLangLong,
+      kClassCacheJavaLangShort,
+      kClassCacheJavaLangMath,
+      kClassCacheJavaLangStrictMath,
+      kClassCacheJavaLangThread,
+      kClassCacheLibcoreIoMemory,
+      kClassCacheSunMiscUnsafe,
+      kClassCacheLast
+    };
+
+    /**
+     * To avoid multiple lookups of a method name string, we cache its string
+     * index in the IndexCache. These are the indexes into the IndexCache
+     * name_indexes array.
+     */
+    enum NameCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
+      kNameCacheFirst = 0,
+      kNameCacheReverseBytes = kNameCacheFirst,
+      kNameCacheDoubleToRawLongBits,
+      kNameCacheLongBitsToDouble,
+      kNameCacheFloatToRawIntBits,
+      kNameCacheIntBitsToFloat,
+      kNameCacheAbs,
+      kNameCacheMax,
+      kNameCacheMin,
+      kNameCacheSqrt,
+      kNameCacheCharAt,
+      kNameCacheCompareTo,
+      kNameCacheIsEmpty,
+      kNameCacheIndexOf,
+      kNameCacheLength,
+      kNameCacheCurrentThread,
+      kNameCachePeekByte,
+      kNameCachePeekIntNative,
+      kNameCachePeekLongNative,
+      kNameCachePeekShortNative,
+      kNameCachePokeByte,
+      kNameCachePokeIntNative,
+      kNameCachePokeLongNative,
+      kNameCachePokeShortNative,
+      kNameCacheCompareAndSwapInt,
+      kNameCacheCompareAndSwapLong,
+      kNameCacheCompareAndSwapObject,
+      kNameCacheGetInt,
+      kNameCacheGetIntVolatile,
+      kNameCachePutInt,
+      kNameCachePutIntVolatile,
+      kNameCachePutOrderedInt,
+      kNameCacheGetLong,
+      kNameCacheGetLongVolatile,
+      kNameCachePutLong,
+      kNameCachePutLongVolatile,
+      kNameCachePutOrderedLong,
+      kNameCacheGetObject,
+      kNameCacheGetObjectVolatile,
+      kNameCachePutObject,
+      kNameCachePutObjectVolatile,
+      kNameCachePutOrderedObject,
+      kNameCacheLast
+    };
+
+    /**
+     * To avoid multiple lookups of a method signature, we cache its proto
+     * index in the IndexCache. These are the indexes into the IndexCache
+     * proto_indexes array.
+     */
+    enum ProtoCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
+      kProtoCacheFirst = 0,
+      kProtoCacheI_I = kProtoCacheFirst,
+      kProtoCacheJ_J,
+      kProtoCacheS_S,
+      kProtoCacheD_D,
+      kProtoCacheD_J,
+      kProtoCacheJ_D,
+      kProtoCacheF_I,
+      kProtoCacheI_F,
+      kProtoCacheII_I,
+      kProtoCacheI_C,
+      kProtoCacheString_I,
+      kProtoCache_Z,
+      kProtoCache_I,
+      kProtoCache_Thread,
+      kProtoCacheJ_B,
+      kProtoCacheJ_I,
+      kProtoCacheJ_S,
+      kProtoCacheJB_V,
+      kProtoCacheJI_V,
+      kProtoCacheJJ_V,
+      kProtoCacheJS_V,
+      kProtoCacheObjectJII_Z,
+      kProtoCacheObjectJJJ_Z,
+      kProtoCacheObjectJObjectObject_Z,
+      kProtoCacheObjectJ_I,
+      kProtoCacheObjectJI_V,
+      kProtoCacheObjectJ_J,
+      kProtoCacheObjectJJ_V,
+      kProtoCacheObjectJ_Object,
+      kProtoCacheObjectJObject_V,
+      kProtoCacheLast
+    };
+
+    /**
+     * The maximum number of method parameters we support in the ProtoDef.
+     */
+    static constexpr uint32_t kProtoMaxParams = 6;
+
+    /**
+     * The method signature (proto) definition using cached class indexes.
+     * The return_type and params are used with the IndexCache to look up
+     * appropriate class indexes to be passed to DexFile::FindProtoId().
+     */
+    struct ProtoDef {
+      ClassCacheIndex return_type;
+      uint8_t param_count;
+      ClassCacheIndex params[kProtoMaxParams];
+    };
+
+    /**
+     * The method definition using cached class, name and proto indexes.
+     * The class index, method name index and proto index are used with
+     * IndexCache to look up appropriate parameters for DexFile::FindMethodId().
+     */
+    struct MethodDef {
+      ClassCacheIndex declaring_class;
+      NameCacheIndex name;
+      ProtoCacheIndex proto;
+    };
+
+    /**
+     * The definition of an intrinsic function binds the method definition
+     * to an Intrinsic.
+     */
+    struct IntrinsicDef {
+      MethodDef method_def;
+      Intrinsic intrinsic;
+    };
+
+    /**
+     * Cache for class, method name and method signature indexes used during
+     * intrinsic function lookup to avoid multiple lookups of the same items.
+     *
+     * Many classes have multiple intrinsics and/or they are used in multiple
+     * method signatures and we want to avoid repeated lookups since they are
+     * not exactly cheap. The method names and method signatures are sometimes
+     * reused and therefore cached as well.
+     */
+    struct IndexCache {
+      IndexCache();
+
+      uint32_t class_indexes[kClassCacheLast - kClassCacheFirst];
+      uint32_t name_indexes[kNameCacheLast - kNameCacheFirst];
+      uint32_t proto_indexes[kProtoCacheLast - kProtoCacheFirst];
+    };
+
+    static const char* kClassCacheNames[];
+    static const char* kNameCacheNames[];
+    static const ProtoDef kProtoCacheDefs[];
+
+    static const uint32_t kIndexNotFound = static_cast<uint32_t>(-1);
+    static const uint32_t kIndexUnresolved = static_cast<uint32_t>(-2);
+
+    static uint32_t FindClassIndex(const DexFile* dex_file, IndexCache* cache,
+                                   ClassCacheIndex index);
+    static uint32_t FindNameIndex(const DexFile* dex_file, IndexCache* cache,
+                                  NameCacheIndex index);
+    static uint32_t FindProtoIndex(const DexFile* dex_file, IndexCache* cache,
+                                   ProtoCacheIndex index);
+    static uint32_t FindMethodIndex(const DexFile* dex_file, IndexCache* cache,
+                                    const MethodDef& method_def);
+
+    void DoFindIntrinsics(const DexFile* dex_file, IndexCache* cache,
+                          const IntrinsicDef* defs, uint32_t def_count);
+
+    /*
+     * Maps method indexes (for the particular DexFile) to Intrinsic defintions.
+     */
+    std::map<uint32_t, Intrinsic> intrinsics_;
+    const DexFile* dex_file_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/dex/quick/dex_file_to_method_inliner_map.cc b/compiler/dex/quick/dex_file_to_method_inliner_map.cc
new file mode 100644
index 0000000..56a42bc
--- /dev/null
+++ b/compiler/dex/quick/dex_file_to_method_inliner_map.cc
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <utility>
+#include "thread.h"
+#include "thread-inl.h"
+#include "base/mutex.h"
+#include "base/mutex-inl.h"
+#include "base/logging.h"
+#include "driver/compiler_driver.h"
+#include "dex/quick/arm/arm_dex_file_method_inliner.h"
+#include "dex/quick/mips/mips_dex_file_method_inliner.h"
+#include "dex/quick/x86/x86_dex_file_method_inliner.h"
+
+#include "dex_file_to_method_inliner_map.h"
+
+namespace art {
+
+DexFileToMethodInlinerMap::DexFileToMethodInlinerMap(const CompilerDriver* compiler)
+    : compiler_(compiler),
+      mutex_("inline_helper_mutex") {
+}
+
+DexFileToMethodInlinerMap::~DexFileToMethodInlinerMap() {
+  for (auto& entry : inliners_) {
+    delete entry.second;
+  }
+}
+
+const DexFileMethodInliner& DexFileToMethodInlinerMap::GetMethodInliner(const DexFile* dex_file) {
+  Thread* self = Thread::Current();
+  {
+    ReaderMutexLock lock(self, mutex_);
+    auto it = inliners_.find(dex_file);
+    if (it != inliners_.end()) {
+      return *it->second;
+    }
+  }
+
+  WriterMutexLock lock(self, mutex_);
+  DexFileMethodInliner** inliner = &inliners_[dex_file];  // inserts new entry if not found
+  if (*inliner) {
+    return **inliner;
+  }
+  switch (compiler_->GetInstructionSet()) {
+    case kThumb2:
+      *inliner = new ArmDexFileMethodInliner;
+      break;
+    case kX86:
+      *inliner = new X86DexFileMethodInliner;
+      break;
+    case kMips:
+      *inliner = new MipsDexFileMethodInliner;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected instruction set: " << compiler_->GetInstructionSet();
+  }
+  DCHECK(*inliner != nullptr);
+  // TODO: per-dex file locking for the intrinsics container filling.
+  (*inliner)->FindIntrinsics(dex_file);
+  return **inliner;
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/dex_file_to_method_inliner_map.h b/compiler/dex/quick/dex_file_to_method_inliner_map.h
new file mode 100644
index 0000000..77f2648
--- /dev/null
+++ b/compiler/dex/quick/dex_file_to_method_inliner_map.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_DEX_FILE_TO_METHOD_INLINER_MAP_H_
+#define ART_COMPILER_DEX_QUICK_DEX_FILE_TO_METHOD_INLINER_MAP_H_
+
+#include <map>
+#include <vector>
+#include "base/macros.h"
+#include "base/mutex.h"
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class CompilerDriver;
+class DexFile;
+
+/**
+ * Map each DexFile to its DexFileMethodInliner.
+ *
+ * The method inliner is created and initialized the first time it's requested
+ * for a particular DexFile.
+ */
+class DexFileToMethodInlinerMap {
+  public:
+    explicit DexFileToMethodInlinerMap(const CompilerDriver* compiler);
+    ~DexFileToMethodInlinerMap();
+
+    const DexFileMethodInliner& GetMethodInliner(const DexFile* dex_file) LOCKS_EXCLUDED(mutex_);
+
+  private:
+    const CompilerDriver* const compiler_;
+    ReaderWriterMutex mutex_;
+    std::map<const DexFile*, DexFileMethodInliner*> inliners_ GUARDED_BY(mutex_);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_DEX_FILE_TO_METHOD_INLINER_MAP_H_
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 7225262..469c577 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -15,6 +15,9 @@
  */
 
 #include "dex/compiler_ir.h"
+#include "dex/frontend.h"
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex_file-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "invoke_type.h"
@@ -1227,198 +1230,16 @@
   return true;
 }
 
-bool Mir2Lir::GenIntrinsic(CallInfo* info) {
-  if (info->opt_flags & MIR_INLINED) {
-    return false;
-  }
-  /*
-   * TODO: move these to a target-specific structured constant array
-   * and use a generic match function.  The list of intrinsics may be
-   * slightly different depending on target.
-   * TODO: Fold this into a matching function that runs during
-   * basic block building.  This should be part of the action for
-   * small method inlining and recognition of the special object init
-   * method.  By doing this during basic block construction, we can also
-   * take advantage of/generate new useful dataflow info.
-   */
-  const DexFile::MethodId& target_mid = cu_->dex_file->GetMethodId(info->index);
-  const DexFile::TypeId& declaring_type = cu_->dex_file->GetTypeId(target_mid.class_idx_);
-  StringPiece tgt_methods_declaring_class(
-      cu_->dex_file->StringDataByIdx(declaring_type.descriptor_idx_));
-  if (tgt_methods_declaring_class.starts_with("Ljava/lang/Double;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "long java.lang.Double.doubleToRawLongBits(double)") {
-      return GenInlinedDoubleCvt(info);
-    }
-    if (tgt_method == "double java.lang.Double.longBitsToDouble(long)") {
-      return GenInlinedDoubleCvt(info);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Float;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "int java.lang.Float.floatToRawIntBits(float)") {
-      return GenInlinedFloatCvt(info);
-    }
-    if (tgt_method == "float java.lang.Float.intBitsToFloat(int)") {
-      return GenInlinedFloatCvt(info);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Integer;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "int java.lang.Integer.reverseBytes(int)") {
-      return GenInlinedReverseBytes(info, kWord);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Long;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "long java.lang.Long.reverseBytes(long)") {
-      return GenInlinedReverseBytes(info, kLong);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Math;") ||
-             tgt_methods_declaring_class.starts_with("Ljava/lang/StrictMath;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "int java.lang.Math.abs(int)" ||
-        tgt_method == "int java.lang.StrictMath.abs(int)") {
-      return GenInlinedAbsInt(info);
-    }
-    if (tgt_method == "long java.lang.Math.abs(long)" ||
-        tgt_method == "long java.lang.StrictMath.abs(long)") {
-      return GenInlinedAbsLong(info);
-    }
-    if (tgt_method == "int java.lang.Math.max(int, int)" ||
-        tgt_method == "int java.lang.StrictMath.max(int, int)") {
-      return GenInlinedMinMaxInt(info, false /* is_min */);
-    }
-    if (tgt_method == "int java.lang.Math.min(int, int)" ||
-        tgt_method == "int java.lang.StrictMath.min(int, int)") {
-      return GenInlinedMinMaxInt(info, true /* is_min */);
-    }
-    if (tgt_method == "double java.lang.Math.sqrt(double)" ||
-        tgt_method == "double java.lang.StrictMath.sqrt(double)") {
-      return GenInlinedSqrt(info);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Short;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "short java.lang.Short.reverseBytes(short)") {
-      return GenInlinedReverseBytes(info, kSignedHalf);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/String;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "char java.lang.String.charAt(int)") {
-      return GenInlinedCharAt(info);
-    }
-    if (tgt_method == "int java.lang.String.compareTo(java.lang.String)") {
-      return GenInlinedStringCompareTo(info);
-    }
-    if (tgt_method == "boolean java.lang.String.is_empty()") {
-      return GenInlinedStringIsEmptyOrLength(info, true /* is_empty */);
-    }
-    if (tgt_method == "int java.lang.String.index_of(int, int)") {
-      return GenInlinedIndexOf(info, false /* base 0 */);
-    }
-    if (tgt_method == "int java.lang.String.index_of(int)") {
-      return GenInlinedIndexOf(info, true /* base 0 */);
-    }
-    if (tgt_method == "int java.lang.String.length()") {
-      return GenInlinedStringIsEmptyOrLength(info, false /* is_empty */);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Thread;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "java.lang.Thread java.lang.Thread.currentThread()") {
-      return GenInlinedCurrentThread(info);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Llibcore/io/Memory;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "byte libcore.io.Memory.peekByte(long)") {
-      return GenInlinedPeek(info, kSignedByte);
-    }
-    if (tgt_method == "int libcore.io.Memory.peekIntNative(long)") {
-      return GenInlinedPeek(info, kWord);
-    }
-    if (tgt_method == "long libcore.io.Memory.peekLongNative(long)") {
-      return GenInlinedPeek(info, kLong);
-    }
-    if (tgt_method == "short libcore.io.Memory.peekShortNative(long)") {
-      return GenInlinedPeek(info, kSignedHalf);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeByte(long, byte)") {
-      return GenInlinedPoke(info, kSignedByte);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeIntNative(long, int)") {
-      return GenInlinedPoke(info, kWord);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeLongNative(long, long)") {
-      return GenInlinedPoke(info, kLong);
-    }
-    if (tgt_method == "void libcore.io.Memory.pokeShortNative(long, short)") {
-      return GenInlinedPoke(info, kSignedHalf);
-    }
-  } else if (tgt_methods_declaring_class.starts_with("Lsun/misc/Unsafe;")) {
-    std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file));
-    if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") {
-      return GenInlinedCas32(info, false);
-    }
-    if (tgt_method == "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") {
-      return GenInlinedCas32(info, true);
-    }
-    if (tgt_method == "int sun.misc.Unsafe.getInt(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, false /* is_volatile */);
-    }
-    if (tgt_method == "int sun.misc.Unsafe.getIntVolatile(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, true /* is_volatile */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putInt(java.lang.Object, long, int)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putIntVolatile(java.lang.Object, long, int)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */,
-                                 true /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putOrderedInt(java.lang.Object, long, int)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, true /* is_ordered */);
-    }
-    if (tgt_method == "long sun.misc.Unsafe.getLong(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, true /* is_long */, false /* is_volatile */);
-    }
-    if (tgt_method == "long sun.misc.Unsafe.getLongVolatile(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, true /* is_long */, true /* is_volatile */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putLong(java.lang.Object, long, long)") {
-      return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putLongVolatile(java.lang.Object, long, long)") {
-      return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */,
-                                 true /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putOrderedLong(java.lang.Object, long, long)") {
-      return GenInlinedUnsafePut(info, true /* is_long */, false /* is_object */,
-                                 false /* is_volatile */, true /* is_ordered */);
-    }
-    if (tgt_method == "java.lang.Object sun.misc.Unsafe.getObject(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, false /* is_volatile */);
-    }
-    if (tgt_method == "java.lang.Object sun.misc.Unsafe.getObjectVolatile(java.lang.Object, long)") {
-      return GenInlinedUnsafeGet(info, false /* is_long */, true /* is_volatile */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */,
-                                 false /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putObjectVolatile(java.lang.Object, long, java.lang.Object)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */,
-                                 true /* is_volatile */, false /* is_ordered */);
-    }
-    if (tgt_method == "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)") {
-      return GenInlinedUnsafePut(info, false /* is_long */, true /* is_object */,
-                                 false /* is_volatile */, true /* is_ordered */);
-    }
-  }
-  return false;
-}
-
 void Mir2Lir::GenInvoke(CallInfo* info) {
-  if (GenIntrinsic(info)) {
-    return;
+  if (!(info->opt_flags & MIR_INLINED)) {
+    if (inliner_ == nullptr) {
+      QuickCompilerContext* context = reinterpret_cast<QuickCompilerContext*>(
+          cu_->compiler_driver->GetCompilerContext());
+      inliner_ = &context->GetInlinerMap()->GetMethodInliner(cu_->dex_file);
+    }
+    if (inliner_->GenIntrinsic(this, info)) {
+      return;
+    }
   }
   InvokeType original_type = info->type;  // avoiding mutation by ComputeInvokeInfo
   int call_state = 0;
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 88b244b..5dda445 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -104,7 +104,7 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                           RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-    bool GenInlinedCas32(CallInfo* info, bool need_write_barrier);
+    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
     bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 5229429..dfff260 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -258,7 +258,7 @@
   LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm";
 }
 
-bool MipsMir2Lir::GenInlinedCas32(CallInfo* info, bool need_write_barrier) {
+bool MipsMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK_NE(cu_->instruction_set, kThumb2);
   return false;
 }
diff --git a/compiler/dex/quick/mips/mips_dex_file_method_inliner.cc b/compiler/dex/quick/mips/mips_dex_file_method_inliner.cc
new file mode 100644
index 0000000..05d8ac8
--- /dev/null
+++ b/compiler/dex/quick/mips/mips_dex_file_method_inliner.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "dex/compiler_enums.h"
+
+#include "mips_dex_file_method_inliner.h"
+
+namespace art {
+
+const DexFileMethodInliner::IntrinsicDef MipsDexFileMethodInliner::kIntrinsicMethods[] = {
+#define INTRINSIC(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, d } }
+
+    // INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
+    // INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    // INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
+    // INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+
+    // INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
+    // INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    // INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+
+    // INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
+    // INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
+    // INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
+    // INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    // INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    // INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    // INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    // INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    // INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
+    // INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
+
+    // INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
+    // INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    // INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
+    // INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
+    // INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
+    // INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
+
+    INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
+
+    INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
+    // INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
+    // INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    // INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
+    INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
+    // INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
+    // INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    // INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
+
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas,
+    //           kIntrinsicFlagNone),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsLong),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsObject),
+
+#define UNSAFE_GET_PUT(type, code, type_flags) \
+    INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              type_flags & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              (type_flags | kIntrinsicFlagIsVolatile) & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags), \
+    INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsVolatile), \
+    INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsOrdered)
+
+    // UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
+    // UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
+    // UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
+#undef UNSAFE_GET_PUT
+
+#undef INTRINSIC
+};
+
+MipsDexFileMethodInliner::MipsDexFileMethodInliner() {
+}
+
+MipsDexFileMethodInliner::~MipsDexFileMethodInliner() {
+}
+
+void MipsDexFileMethodInliner::FindIntrinsics(const DexFile* dex_file) {
+  IndexCache cache;
+  DoFindIntrinsics(dex_file, &cache, kIntrinsicMethods, arraysize(kIntrinsicMethods));
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/mips/mips_dex_file_method_inliner.h b/compiler/dex/quick/mips/mips_dex_file_method_inliner.h
new file mode 100644
index 0000000..8fe7ec7
--- /dev/null
+++ b/compiler/dex/quick/mips/mips_dex_file_method_inliner.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_MIPS_MIPS_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_MIPS_MIPS_DEX_FILE_METHOD_INLINER_H_
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class MipsDexFileMethodInliner : public DexFileMethodInliner {
+  public:
+    MipsDexFileMethodInliner();
+    ~MipsDexFileMethodInliner();
+
+    void FindIntrinsics(const DexFile* dex_file);
+
+  private:
+    static const IntrinsicDef kIntrinsicMethods[];
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_MIPS_MIPS_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 1a30b7a..f567b5c 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -198,6 +198,10 @@
     SetupRegMask(&lir->u.m.use_mask, lir->operands[3]);
   }
 
+  if (flags & REG_USE4) {
+    SetupRegMask(&lir->u.m.use_mask, lir->operands[4]);
+  }
+
   if (flags & SETS_CCODES) {
     lir->u.m.def_mask |= ENCODE_CCODE;
   }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4c56b74..ad9b0de 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -106,6 +106,7 @@
 struct LIR;
 struct RegLocation;
 struct RegisterInfo;
+class DexFileMethodInliner;
 class MIRGraph;
 class Mir2Lir;
 
@@ -555,7 +556,6 @@
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
-    bool GenIntrinsic(CallInfo* info);
     int LoadArgRegs(CallInfo* info, int call_state,
                     NextCallInsn next_call_insn,
                     const MethodReference& target_method,
@@ -661,7 +661,7 @@
                           RegLocation rl_src1, RegLocation rl_src2) = 0;
     virtual void GenConversion(Instruction::Code opcode, RegLocation rl_dest,
                                RegLocation rl_src) = 0;
-    virtual bool GenInlinedCas32(CallInfo* info, bool need_write_barrier) = 0;
+    virtual bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object) = 0;
     virtual bool GenInlinedMinMaxInt(CallInfo* info, bool is_min) = 0;
     virtual bool GenInlinedSqrt(CallInfo* info) = 0;
     virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
@@ -837,6 +837,8 @@
     unsigned int fp_spill_mask_;
     LIR* first_lir_insn_;
     LIR* last_lir_insn_;
+    // Lazily retrieved method inliner for intrinsics.
+    const DexFileMethodInliner* inliner_;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 1d6509e..ffe2d67 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -104,7 +104,7 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                           RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
-    bool GenInlinedCas32(CallInfo* info, bool need_write_barrier);
+    bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
     bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 499547b..01d5c17 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -281,7 +281,7 @@
   NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
 }
 
-bool X86Mir2Lir::GenInlinedCas32(CallInfo* info, bool need_write_barrier) {
+bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK_NE(cu_->instruction_set, kThumb2);
   return false;
 }
diff --git a/compiler/dex/quick/x86/x86_dex_file_method_inliner.cc b/compiler/dex/quick/x86/x86_dex_file_method_inliner.cc
new file mode 100644
index 0000000..b788c3c
--- /dev/null
+++ b/compiler/dex/quick/x86/x86_dex_file_method_inliner.cc
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "dex/compiler_enums.h"
+
+#include "x86_dex_file_method_inliner.h"
+
+namespace art {
+
+const DexFileMethodInliner::IntrinsicDef X86DexFileMethodInliner::kIntrinsicMethods[] = {
+#define INTRINSIC(c, n, p, o, d) \
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, d } }
+
+    INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
+    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+
+    INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, kWord),
+    INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, kLong),
+    INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+
+    INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
+    INTRINSIC(JavaLangMath,       Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangStrictMath, Abs, J_J, kIntrinsicAbsLong, 0),
+    INTRINSIC(JavaLangMath,       Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    // INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
+    // INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
+
+    INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
+    INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
+    INTRINSIC(JavaLangString, IsEmpty, _Z, kIntrinsicIsEmptyOrLength, kIntrinsicFlagIsEmpty),
+    INTRINSIC(JavaLangString, IndexOf, II_I, kIntrinsicIndexOf, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
+    INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
+
+    INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
+
+    INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PeekIntNative, J_I, kIntrinsicPeek, kWord),
+    INTRINSIC(LibcoreIoMemory, PeekLongNative, J_J, kIntrinsicPeek, kLong),
+    INTRINSIC(LibcoreIoMemory, PeekShortNative, J_S, kIntrinsicPeek, kSignedHalf),
+    INTRINSIC(LibcoreIoMemory, PokeByte, JB_V, kIntrinsicPoke, kSignedByte),
+    INTRINSIC(LibcoreIoMemory, PokeIntNative, JI_V, kIntrinsicPoke, kWord),
+    INTRINSIC(LibcoreIoMemory, PokeLongNative, JJ_V, kIntrinsicPoke, kLong),
+    INTRINSIC(LibcoreIoMemory, PokeShortNative, JS_V, kIntrinsicPoke, kSignedHalf),
+
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas,
+    //           kIntrinsicFlagNone),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsLong),
+    // INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas,
+    //           kIntrinsicFlagIsObject),
+
+#define UNSAFE_GET_PUT(type, code, type_flags) \
+    INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              type_flags & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
+              (type_flags | kIntrinsicFlagIsVolatile) & ~kIntrinsicFlagIsObject), \
+    INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags), \
+    INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsVolatile), \
+    INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
+              type_flags | kIntrinsicFlagIsOrdered)
+
+    UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
+    UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
+
+    // UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject),
+    // PutObject: "TODO: fix X86, it exhausts registers for card marking."
+    INTRINSIC(SunMiscUnsafe, GetObject, ObjectJ_Object, kIntrinsicUnsafeGet,
+              kIntrinsicFlagNone),
+    INTRINSIC(SunMiscUnsafe, GetObjectVolatile, ObjectJ_Object, kIntrinsicUnsafeGet,
+              kIntrinsicFlagIsVolatile),
+#undef UNSAFE_GET_PUT
+
+#undef INTRINSIC
+};
+
+X86DexFileMethodInliner::X86DexFileMethodInliner() {
+}
+
+X86DexFileMethodInliner::~X86DexFileMethodInliner() {
+}
+
+void X86DexFileMethodInliner::FindIntrinsics(const DexFile* dex_file) {
+  IndexCache cache;
+  DoFindIntrinsics(dex_file, &cache, kIntrinsicMethods, arraysize(kIntrinsicMethods));
+}
+
+}  // namespace art
diff --git a/compiler/dex/quick/x86/x86_dex_file_method_inliner.h b/compiler/dex/quick/x86/x86_dex_file_method_inliner.h
new file mode 100644
index 0000000..7813e44
--- /dev/null
+++ b/compiler/dex/quick/x86/x86_dex_file_method_inliner.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_X86_X86_DEX_FILE_METHOD_INLINER_H_
+#define ART_COMPILER_DEX_QUICK_X86_X86_DEX_FILE_METHOD_INLINER_H_
+
+#include "dex/quick/dex_file_method_inliner.h"
+
+namespace art {
+
+class X86DexFileMethodInliner : public DexFileMethodInliner {
+  public:
+    X86DexFileMethodInliner();
+    ~X86DexFileMethodInliner();
+
+    void FindIntrinsics(const DexFile* dex_file);
+
+  private:
+    static const IntrinsicDef kIntrinsicMethods[];
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_X86_X86_DEX_FILE_METHOD_INLINER_H_
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 9cc94e8..7b42879 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -503,9 +503,9 @@
 
 void CompilerDriver::CompileAll(jobject class_loader,
                                 const std::vector<const DexFile*>& dex_files,
-                                base::TimingLogger& timings) {
+                                TimingLogger& timings) {
   DCHECK(!Runtime::Current()->IsStarted());
-  UniquePtr<ThreadPool> thread_pool(new ThreadPool(thread_count_ - 1));
+  UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
   PreCompile(class_loader, dex_files, *thread_pool.get(), timings);
   Compile(class_loader, dex_files, *thread_pool.get(), timings);
   if (dump_stats_) {
@@ -513,10 +513,9 @@
   }
 }
 
-static DexToDexCompilationLevel GetDexToDexCompilationlevel(mirror::ClassLoader* class_loader,
-                                                            const DexFile& dex_file,
-                                                            const DexFile::ClassDef& class_def)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+static DexToDexCompilationLevel GetDexToDexCompilationlevel(
+    SirtRef<mirror::ClassLoader>& class_loader, const DexFile& dex_file,
+    const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   mirror::Class* klass = class_linker->FindClass(descriptor, class_loader);
@@ -531,7 +530,7 @@
   // function). Since image classes can be verified again while compiling an application,
   // we must prevent the DEX-to-DEX compiler from introducing them.
   // TODO: find a way to enable "quick" instructions for image classes and remove this check.
-  bool compiling_image_classes = (class_loader == NULL);
+  bool compiling_image_classes = class_loader.get() == nullptr;
   if (compiling_image_classes) {
     return kRequired;
   } else if (klass->IsVerified()) {
@@ -547,7 +546,7 @@
   }
 }
 
-void CompilerDriver::CompileOne(const mirror::ArtMethod* method, base::TimingLogger& timings) {
+void CompilerDriver::CompileOne(const mirror::ArtMethod* method, TimingLogger& timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   Thread* self = Thread::Current();
   jobject jclass_loader;
@@ -569,7 +568,7 @@
   std::vector<const DexFile*> dex_files;
   dex_files.push_back(dex_file);
 
-  UniquePtr<ThreadPool> thread_pool(new ThreadPool(0U));
+  UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
   PreCompile(jclass_loader, dex_files, *thread_pool.get(), timings);
 
   uint32_t method_idx = method->GetDexMethodIndex();
@@ -579,7 +578,8 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx);
-    mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(jclass_loader);
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                              soa.Decode<mirror::ClassLoader*>(jclass_loader));
     dex_to_dex_compilation_level = GetDexToDexCompilationlevel(class_loader, *dex_file, class_def);
   }
   CompileMethod(code_item, method->GetAccessFlags(), method->GetInvokeType(),
@@ -591,7 +591,7 @@
 }
 
 void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                             ThreadPool& thread_pool, base::TimingLogger& timings) {
+                             ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -600,7 +600,7 @@
 }
 
 void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                                ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                ThreadPool& thread_pool, TimingLogger& timings) {
   LoadImageClasses(timings);
 
   Resolve(class_loader, dex_files, thread_pool, timings);
@@ -685,7 +685,7 @@
 }
 
 // Make a list of descriptors for classes to include in the image
-void CompilerDriver::LoadImageClasses(base::TimingLogger& timings)
+void CompilerDriver::LoadImageClasses(TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_) {
   if (!IsImage()) {
     return;
@@ -697,11 +697,11 @@
   ScopedObjectAccess soa(self);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   for (auto it = image_classes_->begin(), end = image_classes_->end(); it != end;) {
-    std::string descriptor(*it);
+    const std::string& descriptor(*it);
     SirtRef<mirror::Class> klass(self, class_linker->FindSystemClass(descriptor.c_str()));
     if (klass.get() == NULL) {
-      image_classes_->erase(it++);
       VLOG(compiler) << "Failed to find class " << descriptor;
+      image_classes_->erase(it++);
       self->ClearException();
     } else {
       ++it;
@@ -721,8 +721,8 @@
     for (const std::pair<uint16_t, const DexFile*>& exception_type : unresolved_exception_types) {
       uint16_t exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
-      mirror::DexCache* dex_cache = class_linker->FindDexCache(*dex_file);
-      mirror:: ClassLoader* class_loader = NULL;
+      SirtRef<mirror::DexCache> dex_cache(self, class_linker->FindDexCache(*dex_file));
+      SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
       SirtRef<mirror::Class> klass(self, class_linker->ResolveType(*dex_file, exception_type_idx,
                                                                    dex_cache, class_loader));
       if (klass.get() == NULL) {
@@ -773,7 +773,7 @@
   MaybeAddToImageClasses(object->GetClass(), compiler_driver->image_classes_.get());
 }
 
-void CompilerDriver::UpdateImageClasses(base::TimingLogger& timings) {
+void CompilerDriver::UpdateImageClasses(TimingLogger& timings) {
   if (IsImage()) {
     timings.NewSplit("UpdateImageClasses");
 
@@ -782,15 +782,14 @@
     const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter");
     gc::Heap* heap = Runtime::Current()->GetHeap();
     // TODO: Image spaces only?
+    ScopedObjectAccess soa(Thread::Current());
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    heap->FlushAllocStack();
-    heap->GetLiveBitmap()->Walk(FindClinitImageClassesCallback, this);
+    heap->VisitObjects(FindClinitImageClassesCallback, this);
     self->EndAssertNoThreadSuspension(old_cause);
   }
 }
 
-bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file,
-                                                      uint32_t type_idx) {
+bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
   if (IsImage() &&
       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
     if (kIsDebugBuild) {
@@ -815,7 +814,7 @@
   if (IsImage()) {
     // We resolve all const-string strings when building for the image.
     ScopedObjectAccess soa(Thread::Current());
-    mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), Runtime::Current()->GetClassLinker()->FindDexCache(dex_file));
     Runtime::Current()->GetClassLinker()->ResolveString(dex_file, string_idx, dex_cache);
     result = true;
   }
@@ -903,26 +902,27 @@
 }
 
 static mirror::Class* ComputeCompilingMethodsClass(ScopedObjectAccess& soa,
-                                                   mirror::DexCache* dex_cache,
+                                                   SirtRef<mirror::DexCache>& dex_cache,
                                                    const DexCompilationUnit* mUnit)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // The passed dex_cache is a hint, sanity check before asking the class linker that will take a
   // lock.
   if (dex_cache->GetDexFile() != mUnit->GetDexFile()) {
-    dex_cache = mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile());
+    dex_cache.reset(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
   }
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader());
-  const DexFile::MethodId& referrer_method_id = mUnit->GetDexFile()->GetMethodId(mUnit->GetDexMethodIndex());
+  SirtRef<mirror::ClassLoader>
+      class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
+  const DexFile::MethodId& referrer_method_id =
+      mUnit->GetDexFile()->GetMethodId(mUnit->GetDexMethodIndex());
   return mUnit->GetClassLinker()->ResolveType(*mUnit->GetDexFile(), referrer_method_id.class_idx_,
                                               dex_cache, class_loader);
 }
 
-static mirror::ArtField* ComputeFieldReferencedFromCompilingMethod(ScopedObjectAccess& soa,
-                                                                const DexCompilationUnit* mUnit,
-                                                                uint32_t field_idx)
+static mirror::ArtField* ComputeFieldReferencedFromCompilingMethod(
+    ScopedObjectAccess& soa, const DexCompilationUnit* mUnit, uint32_t field_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::DexCache* dex_cache = mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile());
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader());
+  SirtRef<mirror::DexCache> dex_cache(soa.Self(), mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   return mUnit->GetClassLinker()->ResolveField(*mUnit->GetDexFile(), field_idx, dex_cache,
                                                class_loader, false);
 }
@@ -932,8 +932,8 @@
                                                                      uint32_t method_idx,
                                                                      InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::DexCache* dex_cache = mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile());
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader());
+  SirtRef<mirror::DexCache> dex_cache(soa.Self(), mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   return mUnit->GetClassLinker()->ResolveMethod(*mUnit->GetDexFile(), method_idx, dex_cache,
                                                 class_loader, NULL, type);
 }
@@ -947,9 +947,10 @@
   // Try to resolve field and ignore if an Incompatible Class Change Error (ie is static).
   mirror::ArtField* resolved_field = ComputeFieldReferencedFromCompilingMethod(soa, mUnit, field_idx);
   if (resolved_field != NULL && !resolved_field->IsStatic()) {
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(),
+                                        resolved_field->GetDeclaringClass()->GetDexCache());
     mirror::Class* referrer_class =
-        ComputeCompilingMethodsClass(soa, resolved_field->GetDeclaringClass()->GetDexCache(),
-                                     mUnit);
+        ComputeCompilingMethodsClass(soa, dex_cache, mUnit);
     if (referrer_class != NULL) {
       mirror::Class* fields_class = resolved_field->GetDeclaringClass();
       bool access_ok = referrer_class->CanAccess(fields_class) &&
@@ -997,9 +998,9 @@
   // Try to resolve field and ignore if an Incompatible Class Change Error (ie isn't static).
   mirror::ArtField* resolved_field = ComputeFieldReferencedFromCompilingMethod(soa, mUnit, field_idx);
   if (resolved_field != NULL && resolved_field->IsStatic()) {
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), resolved_field->GetDeclaringClass()->GetDexCache());
     mirror::Class* referrer_class =
-        ComputeCompilingMethodsClass(soa, resolved_field->GetDeclaringClass()->GetDexCache(),
-                                     mUnit);
+        ComputeCompilingMethodsClass(soa, dex_cache, mUnit);
     if (referrer_class != NULL) {
       mirror::Class* fields_class = resolved_field->GetDeclaringClass();
       if (fields_class == referrer_class) {
@@ -1085,7 +1086,7 @@
   *direct_code = 0;
   *direct_method = 0;
   bool use_dex_cache = false;
-  bool compiling_boot = Runtime::Current()->GetHeap()->GetContinuousSpaces().size() == 1;
+  const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot();
   if (compiler_backend_ == kPortable) {
     if (sharp_type != kStatic && sharp_type != kDirect) {
       return;
@@ -1198,9 +1199,9 @@
     }
     // Don't try to fast-path if we don't understand the caller's class or this appears to be an
     // Incompatible Class Change Error.
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), resolved_method->GetDeclaringClass()->GetDexCache());
     mirror::Class* referrer_class =
-        ComputeCompilingMethodsClass(soa, resolved_method->GetDeclaringClass()->GetDexCache(),
-                                     mUnit);
+        ComputeCompilingMethodsClass(soa, dex_cache, mUnit);
     bool icce = resolved_method->CheckIncompatibleClassChange(*invoke_type);
     if (referrer_class != NULL && !icce) {
       mirror::Class* methods_class = resolved_method->GetDeclaringClass();
@@ -1254,10 +1255,8 @@
           const MethodReference* devirt_map_target =
               verifier::MethodVerifier::GetDevirtMap(caller_method, dex_pc);
           if (devirt_map_target != NULL) {
-            mirror::DexCache* target_dex_cache =
-                mUnit->GetClassLinker()->FindDexCache(*devirt_map_target->dex_file);
-            mirror::ClassLoader* class_loader =
-                soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader());
+            SirtRef<mirror::DexCache> target_dex_cache(soa.Self(), mUnit->GetClassLinker()->FindDexCache(*devirt_map_target->dex_file));
+            SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
             mirror::ArtMethod* called_method =
                 mUnit->GetClassLinker()->ResolveMethod(*devirt_map_target->dex_file,
                                                        devirt_map_target->dex_method_index,
@@ -1509,13 +1508,11 @@
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
   if (!SkipClass(class_linker, jclass_loader, dex_file, class_def)) {
     ScopedObjectAccess soa(self);
-    mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(jclass_loader);
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(dex_file);
-
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), class_linker->FindDexCache(dex_file));
     // Resolve the class.
     mirror::Class* klass = class_linker->ResolveType(dex_file, class_def.class_idx_, dex_cache,
                                                      class_loader);
-
     bool resolve_fields_and_methods;
     if (klass == NULL) {
       // Class couldn't be resolved, for example, super-class is in a different dex file. Don't
@@ -1598,8 +1595,8 @@
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* class_linker = manager->GetClassLinker();
   const DexFile& dex_file = *manager->GetDexFile();
-  mirror::DexCache* dex_cache = class_linker->FindDexCache(dex_file);
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(manager->GetClassLoader());
+  SirtRef<mirror::DexCache> dex_cache(soa.Self(), class_linker->FindDexCache(dex_file));
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(manager->GetClassLoader()));
   mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader);
 
   if (klass == NULL) {
@@ -1616,7 +1613,7 @@
 }
 
 void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
-                                    ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                    ThreadPool& thread_pool, TimingLogger& timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: we could resolve strings here, although the string table is largely filled with class
@@ -1635,7 +1632,7 @@
 }
 
 void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                            ThreadPool& thread_pool, base::TimingLogger& timings) {
+                            ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -1652,8 +1649,9 @@
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
   ClassLinker* class_linker = manager->GetClassLinker();
   jobject jclass_loader = manager->GetClassLoader();
-  mirror::Class* klass = class_linker->FindClass(descriptor,
-                                                 soa.Decode<mirror::ClassLoader*>(jclass_loader));
+  SirtRef<mirror::ClassLoader> class_loader(
+      soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
+  mirror::Class* klass = class_linker->FindClass(descriptor, class_loader);
   if (klass == NULL) {
     CHECK(soa.Self()->IsExceptionPending());
     soa.Self()->ClearException();
@@ -1663,11 +1661,10 @@
      * This is to ensure the class is structurally sound for compilation. An unsound class
      * will be rejected by the verifier and later skipped during compilation in the compiler.
      */
-    mirror::DexCache* dex_cache = class_linker->FindDexCache(dex_file);
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), class_linker->FindDexCache(dex_file));
     std::string error_msg;
-    if (verifier::MethodVerifier::VerifyClass(&dex_file, dex_cache,
-                                              soa.Decode<mirror::ClassLoader*>(jclass_loader),
-                                              &class_def, true, &error_msg) ==
+    if (verifier::MethodVerifier::VerifyClass(&dex_file, dex_cache, class_loader, &class_def, true,
+                                              &error_msg) ==
                                                   verifier::MethodVerifier::kHardFailure) {
       LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor)
                  << " because: " << error_msg;
@@ -1689,7 +1686,7 @@
 }
 
 void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
-                                   ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                   ThreadPool& thread_pool, TimingLogger& timings) {
   timings.NewSplit("Verify Dex File");
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool);
@@ -2124,7 +2121,8 @@
   const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_);
 
   ScopedObjectAccess soa(Thread::Current());
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(jclass_loader);
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                            soa.Decode<mirror::ClassLoader*>(jclass_loader));
   mirror::Class* klass = manager->GetClassLinker()->FindClass(descriptor, class_loader);
 
   if (klass != NULL && !SkipClass(jclass_loader, dex_file, klass)) {
@@ -2194,7 +2192,7 @@
 }
 
 void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
-                                       ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                       ThreadPool& thread_pool, TimingLogger& timings) {
   timings.NewSplit("InitializeNoClinit");
 #ifndef NDEBUG
   // Sanity check blacklist descriptors.
@@ -2212,7 +2210,7 @@
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
                                        const std::vector<const DexFile*>& dex_files,
-                                       ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                       ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -2221,7 +2219,7 @@
 }
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                       ThreadPool& thread_pool, base::TimingLogger& timings) {
+                       ThreadPool& thread_pool, TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -2253,7 +2251,8 @@
   DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile;
   {
     ScopedObjectAccess soa(Thread::Current());
-    mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(jclass_loader);
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                              soa.Decode<mirror::ClassLoader*>(jclass_loader));
     dex_to_dex_compilation_level = GetDexToDexCompilationlevel(class_loader, dex_file, class_def);
   }
   ClassDataItemIterator it(dex_file, class_data);
@@ -2301,7 +2300,7 @@
 }
 
 void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
-                                    ThreadPool& thread_pool, base::TimingLogger& timings) {
+                                    ThreadPool& thread_pool, TimingLogger& timings) {
   timings.NewSplit("Compile Dex File");
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
                                      &dex_file, thread_pool);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 9321f06..7e81849 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -98,11 +98,11 @@
   ~CompilerDriver();
 
   void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  base::TimingLogger& timings)
+                  TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   // Compile a single Method
-  void CompileOne(const mirror::ArtMethod* method, base::TimingLogger& timings)
+  void CompileOne(const mirror::ArtMethod* method, TimingLogger& timings)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const InstructionSet& GetInstructionSet() const {
@@ -340,42 +340,43 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                  ThreadPool& thread_pool, base::TimingLogger& timings)
+                  ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  void LoadImageClasses(base::TimingLogger& timings);
+  void LoadImageClasses(TimingLogger& timings);
 
   // Attempt to resolve all type, methods, fields, and strings
   // referenced from code in the dex file following PathClassLoader
   // ordering semantics.
   void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool& thread_pool, base::TimingLogger& timings)
+               ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void ResolveDexFile(jobject class_loader, const DexFile& dex_file,
-                      ThreadPool& thread_pool, base::TimingLogger& timings)
+                      ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-              ThreadPool& thread_pool, base::TimingLogger& timings);
+              ThreadPool& thread_pool, TimingLogger& timings);
   void VerifyDexFile(jobject class_loader, const DexFile& dex_file,
-                     ThreadPool& thread_pool, base::TimingLogger& timings)
+                     ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   void InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-                         ThreadPool& thread_pool, base::TimingLogger& timings)
+                         ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void InitializeClasses(jobject class_loader, const DexFile& dex_file,
-                         ThreadPool& thread_pool, base::TimingLogger& timings)
+                         ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_, compiled_classes_lock_);
 
-  void UpdateImageClasses(base::TimingLogger& timings);
+  void UpdateImageClasses(TimingLogger& timings)
+      LOCKS_EXCLUDED(Locks::mutator_lock_);
   static void FindClinitImageClassesCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
-               ThreadPool& thread_pool, base::TimingLogger& timings);
+               ThreadPool& thread_pool, TimingLogger& timings);
   void CompileDexFile(jobject class_loader, const DexFile& dex_file,
-                      ThreadPool& thread_pool, base::TimingLogger& timings)
+                      ThreadPool& thread_pool, TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
                      InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx,
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index c6687bb..a5eb94f 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -36,12 +36,13 @@
 class CompilerDriverTest : public CommonTest {
  protected:
   void CompileAll(jobject class_loader) LOCKS_EXCLUDED(Locks::mutator_lock_) {
-    base::TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
+    TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
     timings.StartSplit("CompileAll");
     compiler_driver_->CompileAll(class_loader,
                                  Runtime::Current()->GetCompileTimeClassPath(class_loader),
                                  timings);
     MakeAllExecutable(class_loader);
+    timings.EndSplit();
   }
 
   void EnsureCompiled(jobject class_loader, const char* class_name, const char* method,
@@ -78,7 +79,9 @@
       const DexFile::ClassDef& class_def = dex_file.GetClassDef(i);
       const char* descriptor = dex_file.GetClassDescriptor(class_def);
       ScopedObjectAccess soa(Thread::Current());
-      mirror::Class* c = class_linker->FindClass(descriptor, soa.Decode<mirror::ClassLoader*>(class_loader));
+      Thread* self = Thread::Current();
+      SirtRef<mirror::ClassLoader> loader(self, soa.Decode<mirror::ClassLoader*>(class_loader));
+      mirror::Class* c = class_linker->FindClass(descriptor, loader);
       CHECK(c != NULL);
       for (size_t i = 0; i < c->NumDirectMethods(); i++) {
         MakeExecutable(c->GetDirectMethod(i));
@@ -142,8 +145,9 @@
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
-    CompileVirtualMethod(NULL, "java.lang.Class", "isFinalizable", "()Z");
-    CompileDirectMethod(NULL, "java.lang.Object", "<init>", "()V");
+    SirtRef<mirror::ClassLoader> null_loader(soa.Self(), nullptr);
+    CompileVirtualMethod(null_loader, "java.lang.Class", "isFinalizable", "()Z");
+    CompileDirectMethod(null_loader, "java.lang.Object", "<init>", "()V");
     class_loader = LoadDex("AbstractMethod");
   }
   ASSERT_TRUE(class_loader != NULL);
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index 8e19ef6..f3fef23 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -358,10 +358,11 @@
     mirror::ArtMethod* method = NULL;
     if (compiler_driver_->IsImage()) {
       ClassLinker* linker = Runtime::Current()->GetClassLinker();
-      mirror::DexCache* dex_cache = linker->FindDexCache(dex_file);
       // Unchecked as we hold mutator_lock_ on entry.
       ScopedObjectAccessUnchecked soa(Thread::Current());
-      method = linker->ResolveMethod(dex_file, method_idx, dex_cache, NULL, NULL, invoke_type);
+      SirtRef<mirror::DexCache> dex_cache(soa.Self(), linker->FindDexCache(dex_file));
+      SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
+      method = linker->ResolveMethod(dex_file, method_idx, dex_cache, class_loader, NULL, invoke_type);
       CHECK(method != NULL);
     }
     const CompiledMethod* compiled_method =
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index a8b7c88..c71cc97 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -46,7 +46,7 @@
     {
       jobject class_loader = NULL;
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      base::TimingLogger timings("ImageTest::WriteRead", false, false);
+      TimingLogger timings("ImageTest::WriteRead", false, false);
       timings.StartSplit("CompileAll");
 #if defined(ART_USE_PORTABLE_COMPILER)
       // TODO: we disable this for portable so the test executes in a reasonable amount of time.
@@ -60,13 +60,14 @@
 
       ScopedObjectAccess soa(Thread::Current());
       OatWriter oat_writer(class_linker->GetBootClassPath(),
-                           0, 0, "", compiler_driver_.get());
+                           0, 0, "", compiler_driver_.get(), &timings);
       bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                                 !kIsTargetBuild,
                                                 class_linker->GetBootClassPath(),
                                                 oat_writer,
                                                 tmp_elf.GetFile());
       ASSERT_TRUE(success);
+      timings.EndSplit();
     }
   }
   // Workound bug that mcld::Linker::emit closes tmp_elf by reopening as tmp_oat.
@@ -94,11 +95,11 @@
     ASSERT_NE(0U, image_header.GetImageBitmapSize());
 
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    ASSERT_EQ(1U, heap->GetContinuousSpaces().size());
-    gc::space::ContinuousSpace* space = heap->GetContinuousSpaces().front();
+    ASSERT_TRUE(!heap->GetContinuousSpaces().empty());
+    gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
     ASSERT_FALSE(space->IsImageSpace());
     ASSERT_TRUE(space != NULL);
-    ASSERT_TRUE(space->IsDlMallocSpace());
+    ASSERT_TRUE(space->IsMallocSpace());
     ASSERT_GE(sizeof(image_header) + space->Size(), static_cast<size_t>(file->GetLength()));
   }
 
@@ -139,11 +140,8 @@
   class_linker_ = runtime_->GetClassLinker();
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  ASSERT_EQ(2U, heap->GetContinuousSpaces().size());
-  ASSERT_TRUE(heap->GetContinuousSpaces()[0]->IsImageSpace());
-  ASSERT_FALSE(heap->GetContinuousSpaces()[0]->IsDlMallocSpace());
-  ASSERT_FALSE(heap->GetContinuousSpaces()[1]->IsImageSpace());
-  ASSERT_TRUE(heap->GetContinuousSpaces()[1]->IsDlMallocSpace());
+  ASSERT_TRUE(heap->HasImageSpace());
+  ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
 
   gc::space::ImageSpace* image_space = heap->GetImageSpace();
   image_space->VerifyImageAllocations();
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 75be2c9..90e2c65 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -75,8 +75,6 @@
   image_begin_ = reinterpret_cast<byte*>(image_begin);
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const std::vector<DexCache*>& all_dex_caches = class_linker->GetDexCaches();
-  dex_caches_.insert(all_dex_caches.begin(), all_dex_caches.end());
 
   UniquePtr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
   if (oat_file.get() == NULL) {
@@ -121,22 +119,16 @@
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
   heap->CollectGarbage(false);  // Remove garbage.
-  // Trim size of alloc spaces.
-  for (const auto& space : heap->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      space->AsDlMallocSpace()->Trim();
-    }
-  }
 
   if (!AllocMemory()) {
     return false;
   }
-#ifndef NDEBUG
-  {  // NOLINT(whitespace/braces)
+
+  if (kIsDebugBuild) {
     ScopedObjectAccess soa(Thread::Current());
     CheckNonImageClassesRemoved();
   }
-#endif
+
   Thread::Current()->TransitionFromSuspendedToRunnable();
   size_t oat_loaded_size = 0;
   size_t oat_data_offset = 0;
@@ -144,8 +136,6 @@
   CalculateNewObjectOffsets(oat_loaded_size, oat_data_offset);
   CopyAndFixupObjects();
   PatchOatCodeAndMethods();
-  // Record allocations into the image bitmap.
-  RecordImageAllocations();
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
 
   UniquePtr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
@@ -178,39 +168,82 @@
   return true;
 }
 
-void ImageWriter::RecordImageAllocations() {
-  uint64_t start_time = NanoTime();
-  CHECK(image_bitmap_.get() != nullptr);
-  for (const auto& it : offsets_) {
-    mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + it.second);
-    DCHECK_ALIGNED(obj, kObjectAlignment);
-    image_bitmap_->Set(obj);
+void ImageWriter::SetImageOffset(mirror::Object* object, size_t offset) {
+  DCHECK(object != nullptr);
+  DCHECK_NE(offset, 0U);
+  DCHECK(!IsImageOffsetAssigned(object));
+  mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + offset);
+  DCHECK_ALIGNED(obj, kObjectAlignment);
+  image_bitmap_->Set(obj);
+  // Before we stomp over the lock word, save the hash code for later.
+  Monitor::Deflate(Thread::Current(), object);;
+  LockWord lw(object->GetLockWord());
+  switch (lw.GetState()) {
+    case LockWord::kFatLocked: {
+      LOG(FATAL) << "Fat locked object " << obj << " found during object copy";
+      break;
+    }
+    case LockWord::kThinLocked: {
+      LOG(FATAL) << "Thin locked object " << obj << " found during object copy";
+      break;
+    }
+    case LockWord::kUnlocked:
+      // No hash, don't need to save it.
+      break;
+    case LockWord::kHashCode:
+      saved_hashes_.push_back(std::make_pair(obj, lw.GetHashCode()));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable.";
+      break;
   }
-  LOG(INFO) << "RecordImageAllocations took " << PrettyDuration(NanoTime() - start_time);
+  object->SetLockWord(LockWord::FromForwardingAddress(offset));
+  DCHECK(IsImageOffsetAssigned(object));
+}
+
+void ImageWriter::AssignImageOffset(mirror::Object* object) {
+  DCHECK(object != nullptr);
+  SetImageOffset(object, image_end_);
+  image_end_ += RoundUp(object->SizeOf(), 8);  // 64-bit alignment
+  DCHECK_LT(image_end_, image_->Size());
+}
+
+bool ImageWriter::IsImageOffsetAssigned(const mirror::Object* object) const {
+  DCHECK(object != nullptr);
+  return object->GetLockWord().GetState() == LockWord::kForwardingAddress;
+}
+
+size_t ImageWriter::GetImageOffset(const mirror::Object* object) const {
+  DCHECK(object != nullptr);
+  DCHECK(IsImageOffsetAssigned(object));
+  LockWord lock_word = object->GetLockWord();
+  size_t offset = lock_word.ForwardingAddress();
+  DCHECK_LT(offset, image_end_);
+  return offset;
 }
 
 bool ImageWriter::AllocMemory() {
-  size_t size = 0;
-  for (const auto& space : Runtime::Current()->GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      size += space->Size();
-    }
-  }
-
-  int prot = PROT_READ | PROT_WRITE;
-  size_t length = RoundUp(size, kPageSize);
+  size_t length = RoundUp(Runtime::Current()->GetHeap()->GetTotalMemory(), kPageSize);
   std::string error_msg;
-  image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, prot, &error_msg));
+  image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, PROT_READ | PROT_WRITE,
+                                    &error_msg));
   if (UNLIKELY(image_.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
     return false;
   }
+
+  // Create the image bitmap.
+  image_bitmap_.reset(gc::accounting::SpaceBitmap::Create("image bitmap", image_->Begin(),
+                                                          length));
+  if (image_bitmap_.get() == nullptr) {
+    LOG(ERROR) << "Failed to allocate memory for image bitmap";
+    return false;
+  }
   return true;
 }
 
 void ImageWriter::ComputeLazyFieldsForImageClasses() {
-  Runtime* runtime = Runtime::Current();
-  ClassLinker* class_linker = runtime->GetClassLinker();
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   class_linker->VisitClassesWithoutClassesLock(ComputeLazyFieldsForClassesVisitor, NULL);
 }
 
@@ -223,13 +256,17 @@
   if (!obj->GetClass()->IsStringClass()) {
     return;
   }
-  String* string = obj->AsString();
+  mirror::String* string = obj->AsString();
   const uint16_t* utf16_string = string->GetCharArray()->GetData() + string->GetOffset();
-  ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg);
-  for (DexCache* dex_cache : writer->dex_caches_) {
+  for (DexCache* dex_cache : Runtime::Current()->GetClassLinker()->GetDexCaches()) {
     const DexFile& dex_file = *dex_cache->GetDexFile();
-    const DexFile::StringId* string_id = dex_file.FindStringId(utf16_string);
-    if (string_id != NULL) {
+    const DexFile::StringId* string_id;
+    if (UNLIKELY(string->GetLength() == 0)) {
+      string_id = dex_file.FindStringId("");
+    } else {
+      string_id = dex_file.FindStringId(utf16_string);
+    }
+    if (string_id != nullptr) {
       // This string occurs in this dex file, assign the dex cache entry.
       uint32_t string_idx = dex_file.GetIndexForStringId(*string_id);
       if (dex_cache->GetResolvedString(string_idx) == NULL) {
@@ -239,13 +276,9 @@
   }
 }
 
-void ImageWriter::ComputeEagerResolvedStrings()
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  // TODO: Check image spaces only?
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-  heap->FlushAllocStack();
-  heap->GetLiveBitmap()->Walk(ComputeEagerResolvedStringsCallback, this);
+void ImageWriter::ComputeEagerResolvedStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
 }
 
 bool ImageWriter::IsImageClass(const Class* klass) {
@@ -278,7 +311,7 @@
 
   // Clear references to removed classes from the DexCaches.
   ArtMethod* resolution_method = runtime->GetResolutionMethod();
-  for (DexCache* dex_cache : dex_caches_) {
+  for (DexCache* dex_cache : class_linker->GetDexCaches()) {
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
       if (klass != NULL && !IsImageClass(klass)) {
@@ -311,31 +344,22 @@
 
 void ImageWriter::CheckNonImageClassesRemoved()
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (compiler_driver_.GetImageClasses() == NULL) {
-    return;
+  if (compiler_driver_.GetImageClasses() != nullptr) {
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    heap->VisitObjects(CheckNonImageClassesRemovedCallback, this);
   }
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  Thread* self = Thread::Current();
-  {
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    heap->FlushAllocStack();
-  }
-
-  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  heap->GetLiveBitmap()->Walk(CheckNonImageClassesRemovedCallback, this);
 }
 
 void ImageWriter::CheckNonImageClassesRemovedCallback(Object* obj, void* arg) {
   ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
-  if (!obj->IsClass()) {
-    return;
-  }
-  Class* klass = obj->AsClass();
-  if (!image_writer->IsImageClass(klass)) {
-    image_writer->DumpImageClasses();
-    CHECK(image_writer->IsImageClass(klass)) << ClassHelper(klass).GetDescriptor()
-                                             << " " << PrettyDescriptor(klass);
+  if (obj->IsClass()) {
+    Class* klass = obj->AsClass();
+    if (!image_writer->IsImageClass(klass)) {
+      image_writer->DumpImageClasses();
+      CHECK(image_writer->IsImageClass(klass)) << ClassHelper(klass).GetDescriptor()
+                                               << " " << PrettyDescriptor(klass);
+    }
   }
 }
 
@@ -347,53 +371,50 @@
   }
 }
 
-void ImageWriter::CalculateNewObjectOffsetsCallback(Object* obj, void* arg) {
+void ImageWriter::CalculateObjectOffsets(Object* obj) {
   DCHECK(obj != NULL);
-  DCHECK(arg != NULL);
-  ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
-
   // if it is a string, we want to intern it if its not interned.
   if (obj->GetClass()->IsStringClass()) {
     // we must be an interned string that was forward referenced and already assigned
-    if (image_writer->IsImageOffsetAssigned(obj)) {
+    if (IsImageOffsetAssigned(obj)) {
       DCHECK_EQ(obj, obj->AsString()->Intern());
       return;
     }
-    SirtRef<String> interned(Thread::Current(), obj->AsString()->Intern());
-    if (obj != interned.get()) {
-      if (!image_writer->IsImageOffsetAssigned(interned.get())) {
+    Thread* self = Thread::Current();
+    SirtRef<Object> sirt_obj(self, obj);
+    mirror::String* interned = obj->AsString()->Intern();
+    if (sirt_obj.get() != interned) {
+      if (!IsImageOffsetAssigned(interned)) {
         // interned obj is after us, allocate its location early
-        image_writer->AssignImageOffset(interned.get());
+        AssignImageOffset(interned);
       }
       // point those looking for this object to the interned version.
-      image_writer->SetImageOffset(obj, image_writer->GetImageOffset(interned.get()));
+      SetImageOffset(sirt_obj.get(), GetImageOffset(interned));
       return;
     }
     // else (obj == interned), nothing to do but fall through to the normal case
   }
 
-  image_writer->AssignImageOffset(obj);
+  AssignImageOffset(obj);
 }
 
 ObjectArray<Object>* ImageWriter::CreateImageRoots() const {
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
-  Class* object_array_class = class_linker->FindSystemClass("[Ljava/lang/Object;");
   Thread* self = Thread::Current();
+  SirtRef<Class> object_array_class(self, class_linker->FindSystemClass("[Ljava/lang/Object;"));
 
   // build an Object[] of all the DexCaches used in the source_space_
-  ObjectArray<Object>* dex_caches = ObjectArray<Object>::Alloc(self, object_array_class,
-                                                               dex_caches_.size());
+  ObjectArray<Object>* dex_caches = ObjectArray<Object>::Alloc(self, object_array_class.get(),
+                                                               class_linker->GetDexCaches().size());
   int i = 0;
-  for (DexCache* dex_cache : dex_caches_) {
+  for (DexCache* dex_cache : class_linker->GetDexCaches()) {
     dex_caches->Set(i++, dex_cache);
   }
 
   // build an Object[] of the roots needed to restore the runtime
-  SirtRef<ObjectArray<Object> >
-      image_roots(self,
-                  ObjectArray<Object>::Alloc(self, object_array_class,
-                                             ImageHeader::kImageRootsMax));
+  SirtRef<ObjectArray<Object> > image_roots(
+      self, ObjectArray<Object>::Alloc(self, object_array_class.get(), ImageHeader::kImageRootsMax));
   image_roots->Set(ImageHeader::kResolutionMethod, runtime->GetResolutionMethod());
   image_roots->Set(ImageHeader::kImtConflictMethod, runtime->GetImtConflictMethod());
   image_roots->Set(ImageHeader::kDefaultImt, runtime->GetDefaultImt());
@@ -405,24 +426,82 @@
                    runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs));
   image_roots->Set(ImageHeader::kOatLocation,
                    String::AllocFromModifiedUtf8(self, oat_file_->GetLocation().c_str()));
-  image_roots->Set(ImageHeader::kDexCaches,
-                   dex_caches);
-  image_roots->Set(ImageHeader::kClassRoots,
-                   class_linker->GetClassRoots());
+  image_roots->Set(ImageHeader::kDexCaches, dex_caches);
+  image_roots->Set(ImageHeader::kClassRoots, class_linker->GetClassRoots());
   for (int i = 0; i < ImageHeader::kImageRootsMax; i++) {
     CHECK(image_roots->Get(i) != NULL);
   }
   return image_roots.get();
 }
 
+// Walk instance fields of the given Class. Separate function to allow recursion on the super
+// class.
+void ImageWriter::WalkInstanceFields(mirror::Object* obj, mirror::Class* klass) {
+  // Visit fields of parent classes first.
+  SirtRef<mirror::Class> sirt_class(Thread::Current(), klass);
+  mirror::Class* super = sirt_class->GetSuperClass();
+  if (super != nullptr) {
+    WalkInstanceFields(obj, super);
+  }
+  //
+  size_t num_reference_fields = sirt_class->NumReferenceInstanceFields();
+  for (size_t i = 0; i < num_reference_fields; ++i) {
+    mirror::ArtField* field = sirt_class->GetInstanceField(i);
+    MemberOffset field_offset = field->GetOffset();
+    mirror::Object* value = obj->GetFieldObject<mirror::Object*>(field_offset, false);
+    if (value != nullptr) {
+      WalkFieldsInOrder(value);
+    }
+  }
+}
+
+// For an unvisited object, visit it then all its children found via fields.
+void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
+  if (!IsImageOffsetAssigned(obj)) {
+    // Walk instance fields of all objects
+    Thread* self = Thread::Current();
+    SirtRef<mirror::Object> sirt_obj(self, obj);
+    SirtRef<mirror::Class> klass(self, obj->GetClass());
+    // visit the object itself.
+    CalculateObjectOffsets(sirt_obj.get());
+    WalkInstanceFields(sirt_obj.get(), klass.get());
+    // Walk static fields of a Class.
+    if (sirt_obj->IsClass()) {
+      size_t num_static_fields = klass->NumReferenceStaticFields();
+      for (size_t i = 0; i < num_static_fields; ++i) {
+        mirror::ArtField* field = klass->GetStaticField(i);
+        MemberOffset field_offset = field->GetOffset();
+        mirror::Object* value = sirt_obj->GetFieldObject<mirror::Object*>(field_offset, false);
+        if (value != nullptr) {
+          WalkFieldsInOrder(value);
+        }
+      }
+    } else if (sirt_obj->IsObjectArray()) {
+      // Walk elements of an object array.
+      int32_t length = sirt_obj->AsObjectArray<mirror::Object>()->GetLength();
+      for (int32_t i = 0; i < length; i++) {
+        mirror::ObjectArray<mirror::Object>* obj_array = sirt_obj->AsObjectArray<mirror::Object>();
+        mirror::Object* value = obj_array->Get(i);
+        if (value != nullptr) {
+          WalkFieldsInOrder(value);
+        }
+      }
+    }
+  }
+}
+
+void ImageWriter::WalkFieldsCallback(mirror::Object* obj, void* arg) {
+  ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg);
+  DCHECK(writer != nullptr);
+  writer->WalkFieldsInOrder(obj);
+}
+
 void ImageWriter::CalculateNewObjectOffsets(size_t oat_loaded_size, size_t oat_data_offset) {
   CHECK_NE(0U, oat_loaded_size);
   Thread* self = Thread::Current();
   SirtRef<ObjectArray<Object> > image_roots(self, CreateImageRoots());
 
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  const auto& spaces = heap->GetContinuousSpaces();
-  DCHECK(!spaces.empty());
   DCHECK_EQ(0U, image_end_);
 
   // Leave space for the header, but do not write it yet, we need to
@@ -431,21 +510,14 @@
 
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    heap->FlushAllocStack();
     // TODO: Image spaces only?
-    // TODO: Add InOrderWalk to heap bitmap.
     const char* old = self->StartAssertNoThreadSuspension("ImageWriter");
-    DCHECK(heap->GetLargeObjectsSpace()->GetLiveObjects()->IsEmpty());
-    for (const auto& space : spaces) {
-      space->GetLiveBitmap()->InOrderWalk(CalculateNewObjectOffsetsCallback, this);
-      DCHECK_LT(image_end_, image_->Size());
-    }
+    DCHECK_LT(image_end_, image_->Size());
+    // Clear any pre-existing monitors which may have been in the monitor words.
+    heap->VisitObjects(WalkFieldsCallback, this);
     self->EndAssertNoThreadSuspension(old);
   }
 
-  // Create the image bitmap.
-  image_bitmap_.reset(gc::accounting::SpaceBitmap::Create("image bitmap", image_->Begin(),
-                                                          image_end_));
   const byte* oat_file_begin = image_begin_ + RoundUp(image_end_, kPageSize);
   const byte* oat_file_end = oat_file_begin + oat_loaded_size;
   oat_data_begin_ = oat_file_begin + oat_data_offset;
@@ -453,10 +525,13 @@
 
   // Return to write header at start of image with future location of image_roots. At this point,
   // image_end_ is the size of the image (excluding bitmaps).
+  const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * gc::accounting::SpaceBitmap::kAlignment;
+  const size_t bitmap_bytes = RoundUp(image_end_, heap_bytes_per_bitmap_byte) /
+      heap_bytes_per_bitmap_byte;
   ImageHeader image_header(reinterpret_cast<uint32_t>(image_begin_),
                            static_cast<uint32_t>(image_end_),
                            RoundUp(image_end_, kPageSize),
-                           image_bitmap_->Size(),
+                           RoundUp(bitmap_bytes, kPageSize),
                            reinterpret_cast<uint32_t>(GetImageAddress(image_roots.get())),
                            oat_file_->GetOatHeader().GetChecksum(),
                            reinterpret_cast<uint32_t>(oat_file_begin),
@@ -477,17 +552,19 @@
   heap->DisableObjectValidation();
   // TODO: Image spaces only?
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  heap->FlushAllocStack();
-  heap->GetLiveBitmap()->Walk(CopyAndFixupObjectsCallback, this);
+  heap->VisitObjects(CopyAndFixupObjectsCallback, this);
+  // Fix up the object previously had hash codes.
+  for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) {
+    hash_pair.first->SetLockWord(LockWord::FromHashCode(hash_pair.second));
+  }
+  saved_hashes_.clear();
   self->EndAssertNoThreadSuspension(old_cause);
 }
 
-void ImageWriter::CopyAndFixupObjectsCallback(Object* object, void* arg) {
-  DCHECK(object != NULL);
+void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) {
+  DCHECK(obj != NULL);
   DCHECK(arg != NULL);
-  const Object* obj = object;
   ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
-
   // see GetLocalAddress for similar computation
   size_t offset = image_writer->GetImageOffset(obj);
   byte* dst = image_writer->image_->Begin() + offset;
@@ -498,33 +575,7 @@
   Object* copy = reinterpret_cast<Object*>(dst);
   // Write in a hash code of objects which have inflated monitors or a hash code in their monitor
   // word.
-  LockWord lw(copy->GetLockWord());
-  switch (lw.GetState()) {
-    case LockWord::kFatLocked: {
-      Monitor* monitor = lw.FatLockMonitor();
-      CHECK(monitor != nullptr);
-      CHECK(!monitor->IsLocked());
-      if (monitor->HasHashCode()) {
-        copy->SetLockWord(LockWord::FromHashCode(monitor->GetHashCode()));
-      } else {
-        copy->SetLockWord(LockWord());
-      }
-      break;
-    }
-    case LockWord::kThinLocked: {
-      LOG(FATAL) << "Thin locked object " << obj << " found during object copy";
-      break;
-    }
-    case LockWord::kUnlocked:
-      break;
-    case LockWord::kHashCode:
-      // Do nothing since we can just keep the same hash code.
-      CHECK_NE(lw.GetHashCode(), 0);
-      break;
-    default:
-      LOG(FATAL) << "Unreachable.";
-      break;
-  }
+  copy->SetLockWord(LockWord());
   image_writer->FixupObject(obj, copy);
 }
 
@@ -629,19 +680,13 @@
   DCHECK(copy != NULL);
   Class* klass = orig->GetClass();
   DCHECK(klass != NULL);
-  FixupFields(orig,
-              copy,
-              klass->GetReferenceInstanceOffsets(),
-              false);
+  FixupFields(orig, copy, klass->GetReferenceInstanceOffsets(), false);
 }
 
 void ImageWriter::FixupStaticFields(const Class* orig, Class* copy) {
   DCHECK(orig != NULL);
   DCHECK(copy != NULL);
-  FixupFields(orig,
-              copy,
-              orig->GetReferenceStaticOffsets(),
-              true);
+  FixupFields(orig, copy, orig->GetReferenceStaticOffsets(), true);
 }
 
 void ImageWriter::FixupFields(const Object* orig,
@@ -693,11 +738,13 @@
 static ArtMethod* GetTargetMethod(const CompilerDriver::PatchInformation* patch)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  DexCache* dex_cache = class_linker->FindDexCache(patch->GetDexFile());
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, class_linker->FindDexCache(patch->GetDexFile()));
+  SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
   ArtMethod* method = class_linker->ResolveMethod(patch->GetDexFile(),
                                                   patch->GetTargetMethodIdx(),
                                                   dex_cache,
-                                                  NULL,
+                                                  class_loader,
                                                   NULL,
                                                   patch->GetTargetInvokeType());
   CHECK(method != NULL)
@@ -749,15 +796,15 @@
   // TODO: make this Thumb2 specific
   uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uint32_t>(oat_code) & ~0x1);
   uint32_t* patch_location = reinterpret_cast<uint32_t*>(base + patch->GetLiteralOffset());
-#ifndef NDEBUG
-  const DexFile::MethodId& id = patch->GetDexFile().GetMethodId(patch->GetTargetMethodIdx());
-  uint32_t expected = reinterpret_cast<uint32_t>(&id);
-  uint32_t actual = *patch_location;
-  CHECK(actual == expected || actual == value) << std::hex
-    << "actual=" << actual
-    << "expected=" << expected
-    << "value=" << value;
-#endif
+  if (kIsDebugBuild) {
+    const DexFile::MethodId& id = patch->GetDexFile().GetMethodId(patch->GetTargetMethodIdx());
+    uint32_t expected = reinterpret_cast<uint32_t>(&id);
+    uint32_t actual = *patch_location;
+    CHECK(actual == expected || actual == value) << std::hex
+      << "actual=" << actual
+      << "expected=" << expected
+      << "value=" << value;
+  }
   *patch_location = value;
   oat_header.UpdateChecksum(patch_location, sizeof(value));
 }
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 0b408e8..695f59b 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -63,31 +63,11 @@
   void RecordImageAllocations() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // We use the lock word to store the offset of the object in the image.
-  void AssignImageOffset(mirror::Object* object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(object != NULL);
-    SetImageOffset(object, image_end_);
-    image_end_ += RoundUp(object->SizeOf(), 8);  // 64-bit alignment
-    DCHECK_LT(image_end_, image_->Size());
-  }
-
-  void SetImageOffset(mirror::Object* object, size_t offset) {
-    DCHECK(object != NULL);
-    DCHECK_NE(offset, 0U);
-    DCHECK(!IsImageOffsetAssigned(object));
-    offsets_.Put(object, offset);
-  }
-
-  size_t IsImageOffsetAssigned(const mirror::Object* object) const {
-    DCHECK(object != NULL);
-    return offsets_.find(object) != offsets_.end();
-  }
-
-  size_t GetImageOffset(const mirror::Object* object) const {
-    DCHECK(object != NULL);
-    DCHECK(IsImageOffsetAssigned(object));
-    return offsets_.find(object)->second;
-  }
+  void AssignImageOffset(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetImageOffset(mirror::Object* object, size_t offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsImageOffsetAssigned(const mirror::Object* object) const;
+  size_t GetImageOffset(const mirror::Object* object) const;
 
   mirror::Object* GetImageAddress(const mirror::Object* object) const {
     if (object == NULL) {
@@ -147,7 +127,14 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::ObjectArray<mirror::Object>* CreateImageRoots() const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void CalculateNewObjectOffsetsCallback(mirror::Object* obj, void* arg)
+  void CalculateObjectOffsets(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void WalkInstanceFields(mirror::Object* obj, mirror::Class* klass)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void WalkFieldsInOrder(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void WalkFieldsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Creates the contiguous image in memory and adjusts pointers.
@@ -180,9 +167,6 @@
 
   const CompilerDriver& compiler_driver_;
 
-  // Map of Object to where it will be at runtime.
-  SafeMap<const mirror::Object*, size_t> offsets_;
-
   // oat file with code for this image
   OatFile* oat_file_;
 
@@ -195,6 +179,9 @@
   // Beginning target image address for the output image.
   byte* image_begin_;
 
+  // Saved hashes (objects are inside of the image so that they don't move).
+  std::vector<std::pair<mirror::Object*, uint32_t> > saved_hashes_;
+
   // Beginning target oat address for the pointers from the output image to its oat file.
   const byte* oat_data_begin_;
 
@@ -211,9 +198,6 @@
   uint32_t quick_imt_conflict_trampoline_offset_;
   uint32_t quick_resolution_trampoline_offset_;
   uint32_t quick_to_interpreter_bridge_offset_;
-
-  // DexCaches seen while scanning for fixing up CodeAndDirectMethods
-  std::set<mirror::DexCache*> dex_caches_;
 };
 
 }  // namespace art
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 667b913..21dd11e 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -48,9 +48,9 @@
   void CompileForTest(jobject class_loader, bool direct,
                       const char* method_name, const char* method_sig) {
     ScopedObjectAccess soa(Thread::Current());
+    SirtRef<mirror::ClassLoader> loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(class_loader));
     // Compile the native method before starting the runtime
-    mirror::Class* c = class_linker_->FindClass("LMyClassNatives;",
-                                                soa.Decode<mirror::ClassLoader*>(class_loader));
+    mirror::Class* c = class_linker_->FindClass("LMyClassNatives;", loader);
     mirror::ArtMethod* method;
     if (direct) {
       method = c->FindDirectMethod(method_name, method_sig);
diff --git a/compiler/leb128_encoder_test.cc b/compiler/leb128_encoder_test.cc
new file mode 100644
index 0000000..4fa8075
--- /dev/null
+++ b/compiler/leb128_encoder_test.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/histogram-inl.h"
+#include "common_test.h"
+#include "leb128.h"
+#include "leb128_encoder.h"
+
+namespace art {
+
+class Leb128Test : public CommonTest {};
+
+struct DecodeUnsignedLeb128TestCase {
+  uint32_t decoded;
+  uint8_t leb128_data[5];
+};
+
+static DecodeUnsignedLeb128TestCase uleb128_tests[] = {
+    {0,          {0, 0, 0, 0, 0}},
+    {1,          {1, 0, 0, 0, 0}},
+    {0x7F,       {0x7F, 0, 0, 0, 0}},
+    {0x80,       {0x80, 1, 0, 0, 0}},
+    {0x81,       {0x81, 1, 0, 0, 0}},
+    {0xFF,       {0xFF, 1, 0, 0, 0}},
+    {0x4000,     {0x80, 0x80, 1, 0, 0}},
+    {0x4001,     {0x81, 0x80, 1, 0, 0}},
+    {0x4081,     {0x81, 0x81, 1, 0, 0}},
+    {0x0FFFFFFF, {0xFF, 0xFF, 0xFF, 0x7F, 0}},
+    {0xFFFFFFFF, {0xFF, 0xFF, 0xFF, 0xFF, 0xF}},
+};
+
+TEST_F(Leb128Test, Singles) {
+  // Test individual encodings.
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    UnsignedLeb128EncodingVector builder;
+    builder.PushBack(uleb128_tests[i].decoded);
+    const uint8_t* data_ptr = &uleb128_tests[i].leb128_data[0];
+    const uint8_t* encoded_data_ptr = &builder.GetData()[0];
+    for (size_t j = 0; j < 5; ++j) {
+      if (j < builder.GetData().size()) {
+        EXPECT_EQ(data_ptr[j], encoded_data_ptr[j]) << " i = " << i << " j = " << j;
+      } else {
+        EXPECT_EQ(data_ptr[j], 0U) << " i = " << i << " j = " << j;
+      }
+    }
+    EXPECT_EQ(DecodeUnsignedLeb128(&data_ptr), uleb128_tests[i].decoded) << " i = " << i;
+  }
+}
+
+TEST_F(Leb128Test, Stream) {
+  // Encode a number of entries.
+  UnsignedLeb128EncodingVector builder;
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    builder.PushBack(uleb128_tests[i].decoded);
+  }
+  const uint8_t* encoded_data_ptr = &builder.GetData()[0];
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    const uint8_t* data_ptr = &uleb128_tests[i].leb128_data[0];
+    for (size_t j = 0; j < 5; ++j) {
+      if (data_ptr[j] != 0) {
+        EXPECT_EQ(data_ptr[j], encoded_data_ptr[j]) << " i = " << i << " j = " << j;
+      }
+    }
+    EXPECT_EQ(DecodeUnsignedLeb128(&encoded_data_ptr), uleb128_tests[i].decoded) << " i = " << i;
+  }
+}
+
+TEST_F(Leb128Test, Speed) {
+  UniquePtr<Histogram<uint64_t> > enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
+  UniquePtr<Histogram<uint64_t> > dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
+  UnsignedLeb128EncodingVector builder;
+  // Push back 1024 chunks of 1024 values measuring encoding speed.
+  uint64_t last_time = NanoTime();
+  for (size_t i = 0; i < 1024; i++) {
+    for (size_t j = 0; j < 1024; j++) {
+      builder.PushBack((i * 1024) + j);
+    }
+    uint64_t cur_time = NanoTime();
+    enc_hist->AddValue(cur_time - last_time);
+    last_time = cur_time;
+  }
+  // Verify encoding and measure decode speed.
+  const uint8_t* encoded_data_ptr = &builder.GetData()[0];
+  last_time = NanoTime();
+  for (size_t i = 0; i < 1024; i++) {
+    for (size_t j = 0; j < 1024; j++) {
+      EXPECT_EQ(DecodeUnsignedLeb128(&encoded_data_ptr), (i * 1024) + j);
+    }
+    uint64_t cur_time = NanoTime();
+    dec_hist->AddValue(cur_time - last_time);
+    last_time = cur_time;
+  }
+
+  Histogram<uint64_t>::CumulativeData enc_data;
+  enc_hist->CreateHistogram(&enc_data);
+  enc_hist->PrintConfidenceIntervals(std::cout, 0.99, enc_data);
+
+  Histogram<uint64_t>::CumulativeData dec_data;
+  dec_hist->CreateHistogram(&dec_data);
+  dec_hist->PrintConfidenceIntervals(std::cout, 0.99, dec_data);
+}
+
+}  // namespace art
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index 038f5dc..feb495e 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -211,6 +211,7 @@
   ::llvm::TargetOptions target_options;
   target_options.FloatABIType = ::llvm::FloatABI::Soft;
   target_options.NoFramePointerElim = true;
+  target_options.NoFramePointerElimNonLeaf = true;
   target_options.UseSoftFloat = false;
   target_options.EnableFastISel = false;
 
@@ -254,7 +255,7 @@
 
     ::llvm::OwningPtr< ::llvm::tool_output_file> out_file(
       new ::llvm::tool_output_file(bitcode_filename_.c_str(), errmsg,
-                                 ::llvm::sys::fs::F_Binary));
+                                 ::llvm::raw_fd_ostream::F_Binary));
 
 
     if (!errmsg.empty()) {
@@ -274,6 +275,7 @@
   // pm_builder.Inliner = ::llvm::createAlwaysInlinerPass();
   // pm_builder.Inliner = ::llvm::createPartialInliningPass();
   pm_builder.OptLevel = 3;
+  pm_builder.DisableSimplifyLibCalls = 1;
   pm_builder.DisableUnitAtATime = 1;
   pm_builder.populateFunctionPassManager(fpm);
   pm_builder.populateModulePassManager(pm);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 6213b45..fd0a69d 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -67,6 +67,7 @@
 };
 
 TEST_F(OatTest, WriteRead) {
+  TimingLogger timings("CommonTest::WriteRead", false, false);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: make selectable
@@ -82,7 +83,7 @@
                                             insn_features, false, NULL, 2, true));
   jobject class_loader = NULL;
   if (kCompile) {
-    base::TimingLogger timings("OatTest::WriteRead", false, false);
+    TimingLogger timings("OatTest::WriteRead", false, false);
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), timings);
   }
 
@@ -92,7 +93,8 @@
                        42U,
                        4096U,
                        "lue.art",
-                       compiler_driver_.get());
+                       compiler_driver_.get(),
+                       &timings);
   bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                             !kIsTargetBuild,
                                             class_linker->GetBootClassPath(),
@@ -101,7 +103,6 @@
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
-    base::TimingLogger timings("CommonTest::WriteRead", false, false);
     compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), timings);
   }
   std::string error_msg;
@@ -130,7 +131,8 @@
       num_virtual_methods = it.NumVirtualMethods();
     }
     const char* descriptor = dex_file->GetClassDescriptor(class_def);
-    mirror::Class* klass = class_linker->FindClass(descriptor, NULL);
+    SirtRef<mirror::ClassLoader> loader(Thread::Current(), nullptr);
+    mirror::Class* klass = class_linker->FindClass(descriptor, loader);
 
     UniquePtr<const OatFile::OatClass> oat_class(oat_dex_file->GetOatClass(i));
     CHECK_EQ(mirror::Class::Status::kStatusNotReady, oat_class->GetStatus()) << descriptor;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index f3bb112..8382469 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -40,7 +40,8 @@
                      uint32_t image_file_location_oat_checksum,
                      uint32_t image_file_location_oat_begin,
                      const std::string& image_file_location,
-                     const CompilerDriver* compiler)
+                     const CompilerDriver* compiler,
+                     TimingLogger* timings)
   : compiler_driver_(compiler),
     dex_files_(&dex_files),
     image_file_location_oat_checksum_(image_file_location_oat_checksum),
@@ -77,12 +78,31 @@
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0) {
-  size_t offset = InitOatHeader();
-  offset = InitOatDexFiles(offset);
-  offset = InitDexFiles(offset);
-  offset = InitOatClasses(offset);
-  offset = InitOatCode(offset);
-  offset = InitOatCodeDexFiles(offset);
+  size_t offset;
+  {
+    TimingLogger::ScopedSplit split("InitOatHeader", timings);
+    offset = InitOatHeader();
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatDexFiles", timings);
+    offset = InitOatDexFiles(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitDexFiles", timings);
+    offset = InitDexFiles(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatClasses", timings);
+    offset = InitOatClasses(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatCode", timings);
+    offset = InitOatCode(offset);
+  }
+  {
+    TimingLogger::ScopedSplit split("InitOatCodeDexFiles", timings);
+    offset = InitOatCodeDexFiles(offset);
+  }
   size_ = offset;
 
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
@@ -405,23 +425,23 @@
     size_t gc_map_size = gc_map.size() * sizeof(gc_map[0]);
     gc_map_offset = (gc_map_size == 0) ? 0 : offset;
 
-#if !defined(NDEBUG)
-    // We expect GC maps except when the class hasn't been verified or the method is native
-    ClassReference class_ref(&dex_file, class_def_index);
-    CompiledClass* compiled_class = compiler_driver_->GetCompiledClass(class_ref);
-    mirror::Class::Status status;
-    if (compiled_class != NULL) {
-      status = compiled_class->GetStatus();
-    } else if (verifier::MethodVerifier::IsClassRejected(class_ref)) {
-      status = mirror::Class::kStatusError;
-    } else {
-      status = mirror::Class::kStatusNotReady;
+    if (kIsDebugBuild) {
+      // We expect GC maps except when the class hasn't been verified or the method is native
+      ClassReference class_ref(&dex_file, class_def_index);
+      CompiledClass* compiled_class = compiler_driver_->GetCompiledClass(class_ref);
+      mirror::Class::Status status;
+      if (compiled_class != NULL) {
+        status = compiled_class->GetStatus();
+      } else if (verifier::MethodVerifier::IsClassRejected(class_ref)) {
+        status = mirror::Class::kStatusError;
+      } else {
+        status = mirror::Class::kStatusNotReady;
+      }
+      CHECK(gc_map_size != 0 || is_native || status < mirror::Class::kStatusVerified)
+          << &gc_map << " " << gc_map_size << " " << (is_native ? "true" : "false") << " "
+          << (status < mirror::Class::kStatusVerified) << " " << status << " "
+          << PrettyMethod(method_idx, dex_file);
     }
-    CHECK(gc_map_size != 0 || is_native || status < mirror::Class::kStatusVerified)
-        << &gc_map << " " << gc_map_size << " " << (is_native ? "true" : "false") << " "
-        << (status < mirror::Class::kStatusVerified) << " " << status << " "
-        << PrettyMethod(method_idx, dex_file);
-#endif
 
     // Deduplicate GC maps
     SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator gc_map_iter =
@@ -448,11 +468,12 @@
 
   if (compiler_driver_->IsImage()) {
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
-    mirror::DexCache* dex_cache = linker->FindDexCache(dex_file);
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
+    SirtRef<mirror::DexCache> dex_cache(soa.Self(), linker->FindDexCache(dex_file));
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
     mirror::ArtMethod* method = linker->ResolveMethod(dex_file, method_idx, dex_cache,
-                                                      NULL, NULL, invoke_type);
+                                                      class_loader, nullptr, invoke_type);
     CHECK(method != NULL);
     method->SetFrameSizeInBytes(frame_size_in_bytes);
     method->SetCoreSpillMask(core_spill_mask);
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5d947cf..64275e6 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -67,7 +67,8 @@
             uint32_t image_file_location_oat_checksum,
             uint32_t image_file_location_oat_begin,
             const std::string& image_file_location,
-            const CompilerDriver* compiler);
+            const CompilerDriver* compiler,
+            TimingLogger* timings);
 
   const OatHeader& GetOatHeader() const {
     return *oat_header_;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 3781921..8b232700 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -242,7 +242,7 @@
                                       bool image,
                                       UniquePtr<CompilerDriver::DescriptorSet>& image_classes,
                                       bool dump_stats,
-                                      base::TimingLogger& timings) {
+                                      TimingLogger& timings) {
     // SirtRef and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = NULL;
     Thread* self = Thread::Current();
@@ -280,6 +280,7 @@
     uint32_t image_file_location_oat_checksum = 0;
     uint32_t image_file_location_oat_data_begin = 0;
     if (!driver->IsImage()) {
+      TimingLogger::ScopedSplit split("Loading image checksum", &timings);
       gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
       image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
       image_file_location_oat_data_begin =
@@ -294,8 +295,10 @@
                          image_file_location_oat_checksum,
                          image_file_location_oat_data_begin,
                          image_file_location,
-                         driver.get());
+                         driver.get(),
+                         &timings);
 
+    TimingLogger::ScopedSplit split("Writing ELF", &timings);
     if (!driver->WriteElf(android_root, is_host, dex_files, oat_writer, oat_file)) {
       LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
       return NULL;
@@ -600,7 +603,7 @@
 }
 
 static int dex2oat(int argc, char** argv) {
-  base::TimingLogger timings("compiler", false, false);
+  TimingLogger timings("compiler", false, false);
 
   InitLogging(argv);
 
@@ -1091,7 +1094,7 @@
 
   if (is_host) {
     if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
-      LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+      LOG(INFO) << Dumpable<TimingLogger>(timings);
     }
     return EXIT_SUCCESS;
   }
@@ -1133,7 +1136,7 @@
   timings.EndSplit();
 
   if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
-    LOG(INFO) << Dumpable<base::TimingLogger>(timings);
+    LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
 
   // Everything was successfully written, do an explicit exit here to avoid running Runtime
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 8d4f3ce..936fb07 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -180,6 +180,44 @@
   return os;
 }
 
+struct FpRegister {
+  explicit FpRegister(uint32_t instr, uint16_t at_bit, uint16_t extra_at_bit) {
+    size = (instr >> 8) & 1;
+    uint32_t Vn = (instr >> at_bit) & 0xF;
+    uint32_t N = (instr >> extra_at_bit) & 1;
+    r = (size != 0 ? ((N << 4) | Vn) : ((Vn << 1) | N));
+  }
+  FpRegister(const FpRegister& other, uint32_t offset)
+      : size(other.size), r(other.r + offset) {}
+
+  uint32_t size;  // 0 = f32, 1 = f64
+  uint32_t r;
+};
+std::ostream& operator<<(std::ostream& os, const FpRegister& rhs) {
+  return os << ((rhs.size != 0) ? "d" : "s") << rhs.r;
+}
+
+struct FpRegisterRange {
+  explicit FpRegisterRange(uint32_t instr)
+      : first(instr, 12, 22), imm8(instr & 0xFF) {}
+  FpRegister first;
+  uint32_t imm8;
+};
+std::ostream& operator<<(std::ostream& os, const FpRegisterRange& rhs) {
+  os << "{" << rhs.first;
+  int count = (rhs.first.size != 0 ? ((rhs.imm8 + 1u) >> 1) : rhs.imm8);
+  if (count > 1) {
+    os << "-" << FpRegister(rhs.first, count - 1);
+  }
+  if (rhs.imm8 == 0) {
+    os << " (EMPTY)";
+  } else if (rhs.first.size != 0 && (rhs.imm8 & 1) != 0) {
+    os << rhs.first << " (HALF)";
+  }
+  os << "}";
+  return os;
+}
+
 void DisassemblerArm::DumpArm(std::ostream& os, const uint8_t* instr_ptr) {
   uint32_t instruction = ReadU32(instr_ptr);
   uint32_t cond = (instruction >> 28) & 0xf;
@@ -389,9 +427,9 @@
           args << Rt << "," << Rd << ", [" << Rn;
           const char *sign = U ? "+" : "-";
           if (P == 0 && W == 1) {
-            args << "], #" << sign << imm8;
+            args << "], #" << sign << (imm8 << 2);
           } else {
-            args << ", #" << sign << imm8 << "]";
+            args << ", #" << sign << (imm8 << 2) << "]";
             if (W == 1) {
               args << "!";
             }
@@ -616,57 +654,115 @@
         uint32_t op4 = (instr >> 4) & 0x1;
 
         if (coproc == 10 || coproc == 11) {   // 101x
-          if (op3 < 0x20 && (op3 >> 1) != 2) {     // 0xxxxx and not 00010x
-            // extension load/store instructions
-            int op = op3 & 0x1f;
+          if (op3 < 0x20 && (op3 & ~5) != 0) {     // 0xxxxx and not 000x0x
+            // Extension register load/store instructions
+            // |1111|110|00000|0000|1111|110|0|00000000|
+            // |5  2|1 9|87654|3  0|5  2|1 9|8|7      0|
+            // |----|---|-----|----|----|---|-|--------|
+            // |3322|222|22222|1111|1111|110|0|00000000|
+            // |1  8|7 5|4   0|9  6|5  2|1 9|8|7      0|
+            // |----|---|-----|----|----|---|-|--------|
+            // |1110|110|PUDWL| Rn | Vd |101|S|  imm8  |
             uint32_t P = (instr >> 24) & 1;
             uint32_t U = (instr >> 23) & 1;
-            uint32_t D = (instr >> 22) & 1;
             uint32_t W = (instr >> 21) & 1;
-            uint32_t S = (instr >> 8) & 1;
-            ArmRegister Rn(instr, 16);
-            uint32_t Vd = (instr >> 12) & 0xF;
-            uint32_t imm8 = instr & 0xFF;
-            uint32_t d = (S == 0 ? ((Vd << 1) | D) : (Vd | (D << 4)));
-            ArmRegister Rd(d, 0);
-
-            if (op == 8 || op == 12 || op == 10 || op == 14 ||
-                op == 18 || op == 22) {   // 01x00 or 01x10
-              // vector store multiple or vpush
-              if (P == 1 && U == 0 && W == 1 && Rn.r == 13) {
-                opcode << "vpush" << (S == 0 ? ".f64" : ".f32");
-                args << Rd << " .. " << (Rd.r + imm8);
-              } else {
-                opcode << "vstm" << (S == 0 ? ".f64" : ".f32");
-                args << Rn << ", " << Rd << " .. " << (Rd.r + imm8);
+            if (P == U && W == 1) {
+              opcode << "UNDEFINED";
+            } else {
+              uint32_t L = (instr >> 20) & 1;
+              uint32_t S = (instr >> 8) & 1;
+              ArmRegister Rn(instr, 16);
+              if (P == 1 && W == 0) {  // VLDR
+                FpRegister d(instr, 12, 22);
+                uint32_t imm8 = instr & 0xFF;
+                opcode << (L == 1 ? "vldr" : "vstr");
+                args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-")
+                     << (imm8 << 2) << "]";
+              } else if (Rn.r == 13 && W == 1 && U == L) {  // VPUSH/VPOP
+                opcode << (L == 1 ? "vpop" : "vpush");
+                args << FpRegisterRange(instr);
+              } else {  // VLDM
+                opcode << (L == 1 ? "vldm" : "vstm");
+                args << Rn << ((W == 1) ? "!" : "") << ", "
+                     << FpRegisterRange(instr);
               }
-            } else if (op == 16 || op == 20 || op == 24 || op == 28) {
-              // 1xx00
-              // vector store register
-              opcode << "vstr" << (S == 0 ? ".f64" : ".f32");
-              args << Rd << ", [" << Rn << ", #" << imm8 << "]";
-            } else if (op == 17 || op == 21 || op == 25 || op == 29) {
-              // 1xx01
-              // vector load register
-               opcode << "vldr" << (S == 0 ? ".f64" : ".f32");
-               args << Rd << ", [" << Rn << ", #" << imm8 << "]";
-            } else if (op == 9 || op == 13 || op == 11 || op == 15 ||
-                op == 19 || op == 23 ) {    // 01x11 10x11
-              // vldm or vpop
-              if (P == 1 && U == 0 && W == 1 && Rn.r == 13) {
-                opcode << "vpop" << (S == 0 ? ".f64" : ".f32");
-                args <<  Rd << " .. " << (Rd.r + imm8);
-              } else {
-                opcode << "vldm" << (S == 0 ? ".f64" : ".f32");
-                args << Rn << ", " << Rd << " .. " << (Rd.r + imm8);
-              }
+              opcode << (S == 1 ? ".f64" : ".f32");
             }
           } else if ((op3 >> 1) == 2) {      // 00010x
-            // 64 bit transfers
+            if ((instr & 0xD0) == 0x10) {
+              // 64bit transfers between ARM core and extension registers.
+              uint32_t L = (instr >> 20) & 1;
+              uint32_t S = (instr >> 8) & 1;
+              ArmRegister Rt2(instr, 16);
+              ArmRegister Rt(instr, 12);
+              FpRegister m(instr, 0, 5);
+              opcode << "vmov" << (S ? ".f64" : ".f32");
+              if (L == 1) {
+                args << Rt << ", " << Rt2 << ", ";
+              }
+              if (S) {
+                args << m;
+              } else {
+                args << m << ", " << FpRegister(m, 1);
+              }
+              if (L == 0) {
+                args << ", " << Rt << ", " << Rt2;
+              }
+              if (Rt.r == 15 || Rt.r == 13 || Rt2.r == 15 || Rt2.r == 13 ||
+                  (S == 0 && m.r == 31) || (L == 1 && Rt.r == Rt2.r)) {
+                args << " (UNPREDICTABLE)";
+              }
+            }
           } else if ((op3 >> 4) == 2 && op4 == 0) {     // 10xxxx, op = 0
             // fp data processing
           } else if ((op3 >> 4) == 2 && op4 == 1) {     // 10xxxx, op = 1
-            // 8,16,32 bit transfers
+            if (coproc == 10 && (op3 & 0xE) == 0) {
+              // VMOV (between ARM core register and single-precision register)
+              // |1111|1100|000|0 |0000|1111|1100|0|00|0|0000|
+              // |5   |1  8|7 5|4 |3  0|5  2|1  8|7|65|4|3  0|
+              // |----|----|---|- |----|----|----|-|--|-|----|
+              // |3322|2222|222|2 |1111|1111|1100|0|00|0|0000|
+              // |1  8|7  4|3 1|0 |9  6|5  2|1  8|7|65|4|3  0|
+              // |----|----|---|- |----|----|----|-|--|-|----|
+              // |1110|1110|000|op| Vn | Rt |1010|N|00|1|0000|
+              uint32_t op = op3 & 1;
+              ArmRegister Rt(instr, 12);
+              FpRegister n(instr, 16, 7);
+              opcode << "vmov.f32";
+              if (op) {
+                args << Rt << ", " << n;
+              } else {
+                args << n << ", " << Rt;
+              }
+              if (Rt.r == 13 || Rt.r == 15 || (instr & 0x6F) != 0) {
+                args << " (UNPREDICTABLE)";
+              }
+            } else if (coproc == 10 && op3 == 0x2F) {
+              // VMRS
+              // |1111|11000000|0000|1111|1100|000|0|0000|
+              // |5   |1      4|3  0|5  2|1  8|7 5|4|3  0|
+              // |----|--------|----|----|----|---|-|----|
+              // |3322|22222222|1111|1111|1100|000|0|0000|
+              // |1  8|7      0|9  6|5  2|1  8|7 5|4|3  0|
+              // |----|--------|----|----|----|---|-|----|
+              // |1110|11101111|reg | Rt |1010|000|1|0000| - last 7 0s are (0)
+              uint32_t spec_reg = (instr >> 16) & 0xF;
+              ArmRegister Rt(instr, 12);
+              opcode << "vmrs";
+              if (spec_reg == 1) {
+                if (Rt.r == 15) {
+                  args << "APSR_nzcv, FPSCR";
+                } else if (Rt.r == 13) {
+                  args << Rt << ", FPSCR (UNPREDICTABLE)";
+                } else {
+                  args << Rt << ", FPSCR";
+                }
+              } else {
+                args << "(PRIVILEGED)";
+              }
+            } else if (coproc == 11 && (op3 & 0x9) != 8) {
+              // VMOV (ARM core register to scalar or vice versa; 8/16/32-bit)
+            }
           }
         }
 
@@ -686,30 +782,19 @@
             uint32_t opc3 = (instr >> 6) & 0x3;
             if ((opc1 & 0xB) == 0xB) {  // 1x11
               // Other VFP data-processing instructions.
-              uint32_t D  = (instr >> 22) & 0x1;
-              uint32_t Vd = (instr >> 12) & 0xF;
               uint32_t sz = (instr >> 8) & 1;
-              uint32_t M  = (instr >> 5) & 1;
-              uint32_t Vm = instr & 0xF;
-              bool dp_operation = sz == 1;
+              FpRegister d(instr, 12, 22);
+              FpRegister m(instr, 0, 5);
               switch (opc2) {
                 case 0x1:  // Vneg/Vsqrt
                   //  1110 11101 D 11 0001 dddd 101s o1M0 mmmm
-                  opcode << (opc3 == 1 ? "vneg" : "vsqrt") << (dp_operation ? ".f64" : ".f32");
-                  if (dp_operation) {
-                    args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
-                  } else {
-                    args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
-                  }
+                  opcode << (opc3 == 1 ? "vneg" : "vsqrt") << (sz == 1 ? ".f64" : ".f32");
+                  args << d << ", " << m;
                   break;
                 case 0x4: case 0x5:  {  // Vector compare
                   // 1110 11101 D 11 0100 dddd 101 sE1M0 mmmm
-                  opcode << (opc3 == 1 ? "vcmp" : "vcmpe") << (dp_operation ? ".f64" : ".f32");
-                  if (dp_operation) {
-                    args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
-                  } else {
-                    args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
-                  }
+                  opcode << (opc3 == 1 ? "vcmp" : "vcmpe") << (sz == 1 ? ".f64" : ".f32");
+                  args << d << ", " << m;
                   break;
                 }
               }
@@ -720,18 +805,11 @@
           if ((instr & 0xFFBF0ED0) == 0xeeb10ac0) {  // Vsqrt
             //  1110 11101 D 11 0001 dddd 101S 11M0 mmmm
             //  1110 11101 0 11 0001 1101 1011 1100 1000 - eeb1dbc8
-            uint32_t D = (instr >> 22) & 1;
-            uint32_t Vd = (instr >> 12) & 0xF;
             uint32_t sz = (instr >> 8) & 1;
-            uint32_t M = (instr >> 5) & 1;
-            uint32_t Vm = instr & 0xF;
-            bool dp_operation = sz == 1;
-            opcode << "vsqrt" << (dp_operation ? ".f64" : ".f32");
-            if (dp_operation) {
-              args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
-            } else {
-              args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
-            }
+            FpRegister d(instr, 12, 22);
+            FpRegister m(instr, 0, 5);
+            opcode << "vsqrt" << (sz == 1 ? ".f64" : ".f32");
+            args << d << ", " << m;
           }
         }
       }
@@ -776,7 +854,7 @@
           } else if (op3 == 0x4) {
             opcode << "teq";
           } else if (op3 == 0x8) {
-            opcode << "cmw";
+            opcode << "cmn.w";
           } else {
             opcode << "cmp.w";
           }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index b9716d5..e219dd3 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -257,6 +257,9 @@
     os << "OAT DEX FILE:\n";
     os << StringPrintf("location: %s\n", oat_dex_file.GetDexFileLocation().c_str());
     os << StringPrintf("checksum: 0x%08x\n", oat_dex_file.GetDexFileLocationChecksum());
+
+    // Create the verifier early.
+
     std::string error_msg;
     UniquePtr<const DexFile> dex_file(oat_dex_file.OpenDexFile(&error_msg));
     if (dex_file.get() == NULL) {
@@ -377,8 +380,20 @@
                                  oat_method.GetCode() != NULL ? "..." : "");
       Indenter indent2_filter(indent1_os.rdbuf(), kIndentChar, kIndentBy1Count);
       std::ostream indent2_os(&indent2_filter);
-      DumpCode(indent2_os, oat_method, dex_method_idx, &dex_file, class_def, code_item,
-               method_access_flags);
+
+      Runtime* runtime = Runtime::Current();
+      if (runtime != nullptr) {
+        ScopedObjectAccess soa(Thread::Current());
+        SirtRef<mirror::DexCache> dex_cache(
+            soa.Self(), runtime->GetClassLinker()->FindDexCache(dex_file));
+        SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
+        verifier::MethodVerifier verifier(&dex_file, &dex_cache, &class_loader, &class_def, code_item,
+                                          dex_method_idx, nullptr, method_access_flags, true, true);
+        verifier.Verify();
+        DumpCode(indent2_os, &verifier, oat_method, code_item);
+      } else {
+        DumpCode(indent2_os, nullptr, oat_method, code_item);
+      }
     }
   }
 
@@ -566,24 +581,10 @@
     }
   }
 
-  void DumpVRegsAtDexPc(std::ostream& os,  const OatFile::OatMethod& oat_method,
-                        uint32_t dex_method_idx, const DexFile* dex_file,
-                        const DexFile::ClassDef& class_def, const DexFile::CodeItem* code_item,
-                        uint32_t method_access_flags, uint32_t dex_pc) {
-    static UniquePtr<verifier::MethodVerifier> verifier;
-    static const DexFile* verified_dex_file = NULL;
-    static uint32_t verified_dex_method_idx = DexFile::kDexNoIndex;
-    if (dex_file != verified_dex_file || verified_dex_method_idx != dex_method_idx) {
-      ScopedObjectAccess soa(Thread::Current());
-      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file);
-      mirror::ClassLoader* class_loader = NULL;
-      verifier.reset(new verifier::MethodVerifier(dex_file, dex_cache, class_loader, &class_def,
-                                                  code_item, dex_method_idx, NULL,
-                                                  method_access_flags, true, true));
-      verifier->Verify();
-      verified_dex_file = dex_file;
-      verified_dex_method_idx = dex_method_idx;
-    }
+  void DumpVRegsAtDexPc(std::ostream& os, verifier::MethodVerifier* verifier,
+                        const OatFile::OatMethod& oat_method,
+                        const DexFile::CodeItem* code_item, uint32_t dex_pc) {
+    DCHECK(verifier != nullptr);
     std::vector<int32_t> kinds = verifier->DescribeVRegs(dex_pc);
     bool first = true;
     for (size_t reg = 0; reg < code_item->registers_size_; reg++) {
@@ -633,18 +634,16 @@
                     uint32_t method_access_flags) {
     if ((method_access_flags & kAccNative) == 0) {
       ScopedObjectAccess soa(Thread::Current());
-      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file);
-      mirror::ClassLoader* class_loader = NULL;
+      SirtRef<mirror::DexCache> dex_cache(soa.Self(), Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file));
+      SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
       verifier::MethodVerifier::VerifyMethodAndDump(os, dex_method_idx, dex_file, dex_cache,
                                                     class_loader, &class_def, code_item, NULL,
                                                     method_access_flags);
     }
   }
 
-  void DumpCode(std::ostream& os,  const OatFile::OatMethod& oat_method,
-                uint32_t dex_method_idx, const DexFile* dex_file,
-                const DexFile::ClassDef& class_def, const DexFile::CodeItem* code_item,
-                uint32_t method_access_flags) {
+  void DumpCode(std::ostream& os, verifier::MethodVerifier* verifier,
+                const OatFile::OatMethod& oat_method, const DexFile::CodeItem* code_item) {
     const void* code = oat_method.GetCode();
     size_t code_size = oat_method.GetCodeSize();
     if (code == NULL || code_size == 0) {
@@ -653,16 +652,14 @@
     }
     const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(code);
     size_t offset = 0;
-    const bool kDumpVRegs = (Runtime::Current() != NULL);
     while (offset < code_size) {
       DumpMappingAtOffset(os, oat_method, offset, false);
       offset += disassembler_->Dump(os, native_pc + offset);
       uint32_t dex_pc = DumpMappingAtOffset(os, oat_method, offset, true);
       if (dex_pc != DexFile::kDexNoIndex) {
         DumpGcMapAtNativePcOffset(os, oat_method, code_item, offset);
-        if (kDumpVRegs) {
-          DumpVRegsAtDexPc(os, oat_method, dex_method_idx, dex_file, class_def, code_item,
-                           method_access_flags, dex_pc);
+        if (verifier != nullptr) {
+          DumpVRegsAtDexPc(os, verifier, oat_method, code_item, dex_pc);
         }
       }
     }
@@ -715,14 +712,25 @@
         if (image_root_object->IsObjectArray()) {
           Indenter indent2_filter(indent1_os.rdbuf(), kIndentChar, kIndentBy1Count);
           std::ostream indent2_os(&indent2_filter);
-          // TODO: replace down_cast with AsObjectArray (g++ currently has a problem with this)
           mirror::ObjectArray<mirror::Object>* image_root_object_array
-              = down_cast<mirror::ObjectArray<mirror::Object>*>(image_root_object);
-          //  = image_root_object->AsObjectArray<Object>();
+              = image_root_object->AsObjectArray<mirror::Object>();
           for (int i = 0; i < image_root_object_array->GetLength(); i++) {
             mirror::Object* value = image_root_object_array->Get(i);
+            size_t run = 0;
+            for (int32_t j = i + 1; j < image_root_object_array->GetLength(); j++) {
+              if (value == image_root_object_array->Get(j)) {
+                run++;
+              } else {
+                break;
+              }
+            }
+            if (run == 0) {
+              indent2_os << StringPrintf("%d: ", i);
+            } else {
+              indent2_os << StringPrintf("%d to %zd: ", i, i + run);
+              i = i + run;
+            }
             if (value != NULL) {
-              indent2_os << i << ": ";
               PrettyObjectValue(indent2_os, value->GetClass(), value);
             } else {
               indent2_os << i << ": null\n";
diff --git a/runtime/Android.mk b/runtime/Android.mk
index bef4381..16f11c6 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -42,6 +42,7 @@
 	dex_instruction.cc \
 	elf_file.cc \
 	gc/allocator/dlmalloc.cc \
+	gc/allocator/rosalloc.cc \
 	gc/accounting/card_table.cc \
 	gc/accounting/gc_allocator.cc \
 	gc/accounting/heap_bitmap.cc \
@@ -50,11 +51,16 @@
 	gc/collector/garbage_collector.cc \
 	gc/collector/mark_sweep.cc \
 	gc/collector/partial_mark_sweep.cc \
+	gc/collector/semi_space.cc \
 	gc/collector/sticky_mark_sweep.cc \
 	gc/heap.cc \
+	gc/reference_queue.cc \
+	gc/space/bump_pointer_space.cc \
 	gc/space/dlmalloc_space.cc \
 	gc/space/image_space.cc \
 	gc/space/large_object_space.cc \
+	gc/space/malloc_space.cc \
+	gc/space/rosalloc_space.cc \
 	gc/space/space.cc \
 	hprof/hprof.cc \
 	image.cc \
@@ -141,6 +147,7 @@
 	arch/arm/registers_arm.cc \
 	arch/x86/registers_x86.cc \
 	arch/mips/registers_mips.cc \
+	arch/quick_alloc_entrypoints.cc \
 	entrypoints/entrypoint_utils.cc \
 	entrypoints/interpreter/interpreter_entrypoints.cc \
 	entrypoints/jni/jni_entrypoints.cc \
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 3dac636..5166d29 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -34,21 +34,6 @@
 extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
 extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
 
-// Alloc entrypoints.
-extern "C" void* art_quick_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
-
-extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
@@ -142,29 +127,7 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
-static bool quick_alloc_entry_points_instrumented = false;
-
-void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
-  quick_alloc_entry_points_instrumented = instrumented;
-}
-
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
-  if (quick_alloc_entry_points_instrumented) {
-    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
-    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
-  } else {
-    qpoints->pAllocArray = art_quick_alloc_array;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-    qpoints->pAllocObject = art_quick_alloc_object;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
-  }
-}
+extern void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
 
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 9a853d0..61be14b 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -16,6 +16,8 @@
 
 #include "asm_support_arm.S"
 
+#include "arch/quick_alloc_entrypoints.S"
+
     /* Deliver the given exception */
     .extern artDeliverExceptionFromCode
     /* Deliver an exception pending on a thread */
@@ -69,12 +71,24 @@
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     add sp, #4               @ bottom word holds Method*
     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r10
+    .cfi_restore r11
     .cfi_adjust_cfa_offset -32
 .endm
 
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
     add sp, #4               @ bottom word holds Method*
     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r10
+    .cfi_restore r11
     .cfi_adjust_cfa_offset -32
     bx  lr                   @ return
 .endm
@@ -86,7 +100,6 @@
 .macro SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
     push {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
     .save {r1-r3, r5-r8, r10-r11, lr}
-    .cfi_adjust_cfa_offset 40
     .cfi_rel_offset r1, 0
     .cfi_rel_offset r2, 4
     .cfi_rel_offset r3, 8
@@ -97,6 +110,7 @@
     .cfi_rel_offset r10, 28
     .cfi_rel_offset r11, 32
     .cfi_rel_offset lr, 36
+    .cfi_adjust_cfa_offset 40
     sub sp, #8                        @ 2 words of space, bottom word will hold Method*
     .pad #8
     .cfi_adjust_cfa_offset 8
@@ -105,6 +119,15 @@
 .macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
     add  sp, #8                      @ rewind sp
     pop {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
+    .cfi_restore r1
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r10
+    .cfi_restore r11
     .cfi_adjust_cfa_offset -48
 .endm
 
@@ -285,6 +308,11 @@
     ldr    ip, [sp, #24]                   @ load the result pointer
     strd   r0, [ip]                        @ store r0/r1 into result pointer
     pop    {r0, r4, r5, r9, r11, lr}       @ restore spill regs
+    .cfi_restore r0
+    .cfi_restore r4
+    .cfi_restore r5
+    .cfi_restore r9
+    .cfi_restore lr
     .cfi_adjust_cfa_offset -24
     bx     lr
 END art_quick_invoke_stub
@@ -413,6 +441,8 @@
     add sp, #4
     .cfi_adjust_cfa_offset -4
     pop {r0-r1, lr}
+    .cfi_restore r0
+    .cfi_restore r1
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
     mov r3, sp                      @ pass SP
@@ -689,6 +719,7 @@
     .cfi_rel_offset r9, 0
     bl     artSet64StaticFromCode        @ (field_idx, referrer, new_val, Thread*, SP)
     add    sp, #16                       @ release out args
+    .cfi_adjust_cfa_offset -16
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME   @ TODO: we can clearly save an add here
     RETURN_IF_RESULT_IS_ZERO
     DELIVER_PENDING_EXCEPTION
@@ -801,205 +832,42 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolve_string
 
-    /*
-     * Called by managed code to allocate an object
-     */
-    .extern artAllocObjectFromCode
-ENTRY art_quick_alloc_object
+// Macro to facilitate adding new allocation entrypoints.
+.macro TWO_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
     mov    r2, r9                     @ pass Thread::Current
     mov    r3, sp                     @ pass SP
-    bl     artAllocObjectFromCode     @ (uint32_t type_idx, Method* method, Thread*, SP)
+    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*, SP)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
+    \return
     DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_object
+END \name
+.endm
 
-    .extern artAllocObjectFromCodeInstrumented
-ENTRY art_quick_alloc_object_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r2, r9                     @ pass Thread::Current
-    mov    r3, sp                     @ pass SP
-    bl     artAllocObjectFromCodeInstrumented     @ (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_object_instrumented
-
-    /*
-     * Called by managed code to allocate an object when the caller doesn't know whether it has
-     * access to the created type.
-     */
-    .extern artAllocObjectFromCodeWithAccessCheck
-ENTRY art_quick_alloc_object_with_access_check
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r2, r9                     @ pass Thread::Current
-    mov    r3, sp                     @ pass SP
-    bl     artAllocObjectFromCodeWithAccessCheck  @ (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_object_with_access_check
-
-    .extern artAllocObjectFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_alloc_object_with_access_check_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r2, r9                     @ pass Thread::Current
-    mov    r3, sp                     @ pass SP
-    bl     artAllocObjectFromCodeWithAccessCheckInstrumented  @ (uint32_t type_idx, Method* method, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_object_with_access_check_instrumented
-
-    /*
-     * Called by managed code to allocate an array.
-     */
-    .extern artAllocArrayFromCode
-ENTRY art_quick_alloc_array
+// Macro to facilitate adding new array allocation entrypoints.
+.macro THREE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
     mov    r3, r9                     @ pass Thread::Current
     mov    r12, sp
     str    r12, [sp, #-16]!           @ expand the frame and pass SP
     .pad #16
     .cfi_adjust_cfa_offset 16
-    @ artAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t component_count, Thread*, SP)
-    bl     artAllocArrayFromCode
+    @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*, SP)
+    bl     \entrypoint
     add    sp, #16                    @ strip the extra frame
     .cfi_adjust_cfa_offset -16
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
+    \return
     DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_array
+END \name
+.endm
 
-    .extern artAllocArrayFromCodeInstrumented
-ENTRY art_quick_alloc_array_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t component_count, Thread*, SP)
-    bl     artAllocArrayFromCodeInstrumented
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_array_instrumented
-
-    /*
-     * Called by managed code to allocate an array when the caller doesn't know whether it has
-     * access to the created type.
-     */
-    .extern artAllocArrayFromCodeWithAccessCheck
-ENTRY art_quick_alloc_array_with_access_check
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artAllocArrayFromCodeWithAccessCheck(type_idx, method, component_count, Thread*, SP)
-    bl     artAllocArrayFromCodeWithAccessCheck
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_array_with_access_check
-
-    .extern artAllocArrayFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_alloc_array_with_access_check_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, component_count, Thread*, SP)
-    bl     artAllocArrayFromCodeWithAccessCheckInstrumented
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_alloc_array_with_access_check_instrumented
-
-    /*
-     * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
-     */
-    .extern artCheckAndAllocArrayFromCode
-ENTRY art_quick_check_and_alloc_array
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artCheckAndAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t count, Thread* , SP)
-    bl     artCheckAndAllocArrayFromCode
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_check_and_alloc_array
-
-    .extern artCheckAndAllocArrayFromCodeInstrumented
-ENTRY art_quick_check_and_alloc_array_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t count, Thread* , SP)
-    bl     artCheckAndAllocArrayFromCodeInstrumented
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_check_and_alloc_array_instrumented
-
-    /*
-     * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
-     */
-    .extern artCheckAndAllocArrayFromCodeWithAccessCheck
-ENTRY art_quick_check_and_alloc_array_with_access_check
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artCheckAndAllocArrayFromCodeWithAccessCheck(type_idx, method, count, Thread* , SP)
-    bl     artCheckAndAllocArrayFromCodeWithAccessCheck
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_check_and_alloc_array_with_access_check
-
-    .extern artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_check_and_alloc_array_with_access_check_instrumented
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
-    mov    r3, r9                     @ pass Thread::Current
-    mov    r12, sp
-    str    r12, [sp, #-16]!           @ expand the frame and pass SP
-    .pad #16
-    .cfi_adjust_cfa_offset 16
-    @ artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, count, Thread* , SP)
-    bl     artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
-    add    sp, #16                    @ strip the extra frame
-    .cfi_adjust_cfa_offset -16
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO
-    DELIVER_PENDING_EXCEPTION
-END art_quick_check_and_alloc_array_with_access_check_instrumented
+// Generate the allocation entrypoints for each allocator.
+GENERATE_ALL_ALLOC_ENTRYPOINTS
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
@@ -1078,11 +946,10 @@
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     add     sp, #16                @ skip r1-r3, 4 bytes padding.
     .cfi_adjust_cfa_offset -16
-    cbnz    r2, 1f                 @ success if no exception is pending
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    cbnz    r2, 1f                 @ success if no exception is pending
     bx    lr                       @ return on success
 1:
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_to_interpreter_bridge
 
@@ -1137,6 +1004,8 @@
     mov   r2, r0         @ link register saved by instrumentation
     mov   lr, r1         @ r1 is holding link register if we're to bounce to deoptimize
     pop   {r0, r1}       @ restore return value
+    .cfi_restore r0
+    .cfi_restore r1
     add sp, #32          @ remove callee save frame
     .cfi_adjust_cfa_offset -32
     bx    r2             @ return
@@ -1187,6 +1056,8 @@
     mov     r1,r10
     pop     {r9 - r10}
     .cfi_adjust_cfa_offset -8
+    .cfi_restore r9
+    .cfi_restore r10
     bx      lr
 END art_quick_mul_long
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 331a461..e1b441a 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -33,21 +33,6 @@
 extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
 extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
 
-// Alloc entrypoints.
-extern "C" void* art_quick_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
-
-extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
@@ -143,29 +128,7 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
-static bool quick_alloc_entry_points_instrumented = false;
-
-void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
-  quick_alloc_entry_points_instrumented = instrumented;
-}
-
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
-  if (quick_alloc_entry_points_instrumented) {
-    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
-    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
-  } else {
-    qpoints->pAllocArray = art_quick_alloc_array;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-    qpoints->pAllocObject = art_quick_alloc_object;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
-  }
-}
+extern void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
 
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 451b1bb..2d1e87a 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -16,6 +16,8 @@
 
 #include "asm_support_mips.S"
 
+#include "arch/quick_alloc_entrypoints.S"
+
     .set noreorder
     .balign 4
 
@@ -89,28 +91,46 @@
 
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     lw     $ra, 60($sp)
+    .cfi_restore 31
     lw     $s8, 56($sp)
+    .cfi_restore 30
     lw     $gp, 52($sp)
+    .cfi_restore 28
     lw     $s7, 48($sp)
+    .cfi_restore 23
     lw     $s6, 44($sp)
+    .cfi_restore 22
     lw     $s5, 40($sp)
+    .cfi_restore 21
     lw     $s4, 36($sp)
+    .cfi_restore 20
     lw     $s3, 32($sp)
+    .cfi_restore 19
     lw     $s2, 28($sp)
+    .cfi_restore 18
     addiu  $sp, $sp, 64
     .cfi_adjust_cfa_offset -64
 .endm
 
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
     lw     $ra, 60($sp)
+    .cfi_restore 31
     lw     $s8, 56($sp)
+    .cfi_restore 30
     lw     $gp, 52($sp)
+    .cfi_restore 28
     lw     $s7, 48($sp)
+    .cfi_restore 23
     lw     $s6, 44($sp)
+    .cfi_restore 22
     lw     $s5, 40($sp)
+    .cfi_restore 21
     lw     $s4, 36($sp)
+    .cfi_restore 20
     lw     $s3, 32($sp)
+    .cfi_restore 19
     lw     $s2, 28($sp)
+    .cfi_restore 18
     jr     $ra
     addiu  $sp, $sp, 64
     .cfi_adjust_cfa_offset -64
@@ -153,17 +173,29 @@
 
 .macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
     lw     $ra, 60($sp)
+    .cfi_restore 31
     lw     $s8, 56($sp)
+    .cfi_restore 30
     lw     $gp, 52($sp)
+    .cfi_restore 28
     lw     $s7, 48($sp)
+    .cfi_restore 23
     lw     $s6, 44($sp)
+    .cfi_restore 22
     lw     $s5, 40($sp)
+    .cfi_restore 21
     lw     $s4, 36($sp)
+    .cfi_restore 20
     lw     $s3, 32($sp)
+    .cfi_restore 19
     lw     $s2, 28($sp)
+    .cfi_restore 18
     lw     $a3, 12($sp)
+    .cfi_restore 7
     lw     $a2, 8($sp)
+    .cfi_restore 6
     lw     $a1, 4($sp)
+    .cfi_restore 5
     addiu  $sp, $sp, 64           # pop frame
     .cfi_adjust_cfa_offset -64
 .endm
@@ -201,7 +233,7 @@
     DELIVER_PENDING_EXCEPTION
 .endm
 
-.macro RETURN_IF_NONZERO
+.macro RETURN_IF_RESULT_IS_NON_ZERO
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     beqz   $v0, 1f                       # success?
     nop
@@ -463,9 +495,13 @@
     sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
     move  $sp, $fp              # restore the stack
     lw    $s0, 0($sp)
+    .cfi_restore 16
     lw    $s1, 4($sp)
+    .cfi_restore 17
     lw    $fp, 8($sp)
+    .cfi_restore 30
     lw    $ra, 12($sp)
+    .cfi_restore 31
     addiu $sp, $sp, 16
     .cfi_adjust_cfa_offset -16
     lw    $t0, 16($sp)          # get result pointer
@@ -655,7 +691,7 @@
     # artInitializeStaticStorageFromCode(uint32_t type_idx, Method* referrer, Thread*, $sp)
     jal     artInitializeStaticStorageFromCode
     move    $a3, $sp                            # pass $sp
-    RETURN_IF_NONZERO
+    RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_initialize_static_storage
 
     /*
@@ -669,7 +705,7 @@
     # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*, $sp)
     jal     artInitializeTypeFromCode
     move    $a3, $sp                           # pass $sp
-    RETURN_IF_NONZERO
+    RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_initialize_type
 
     /*
@@ -684,7 +720,7 @@
     # artInitializeTypeFromCode(uint32_t type_idx, Method* referrer, Thread*, $sp)
     jal     artInitializeTypeAndVerifyAccessFromCode
     move    $a3, $sp                           # pass $sp
-    RETURN_IF_NONZERO
+    RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_initialize_type_and_verify_access
 
     /*
@@ -868,156 +904,37 @@
     # artResolveStringFromCode(Method* referrer, uint32_t string_idx, Thread*, $sp)
     jal     artResolveStringFromCode
     move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
+    RETURN_IF_RESULT_IS_NON_ZERO
 END art_quick_resolve_string
 
-    /*
-     * Called by managed code to allocate an object.
-     */
-    .extern artAllocObjectFromCode
-ENTRY art_quick_alloc_object
+
+// Macro to facilitate adding new allocation entrypoints.
+.macro TWO_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     GENERATE_GLOBAL_POINTER
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
     move    $a2, rSELF                # pass Thread::Current
-    jal     artAllocObjectFromCode    # (uint32_t type_idx, Method* method, Thread*, $sp)
+    jal     \entrypoint
     move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_object
+    \return
+END \name
+.endm
 
-    .extern artAllocObjectFromCodeInstrumented
-ENTRY art_quick_alloc_object_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a2, rSELF                # pass Thread::Current
-    jal     artAllocObjectFromCodeInstrumented    # (uint32_t type_idx, Method* method, Thread*, $sp)
-    move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_object_instrumented
-
-    /*
-     * Called by managed code to allocate an object when the caller doesn't know whether it has
-     * access to the created type.
-     */
-    .extern artAllocObjectFromCodeWithAccessCheck
-ENTRY art_quick_alloc_object_with_access_check
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a2, rSELF                # pass Thread::Current
-    jal     artAllocObjectFromCodeWithAccessCheck  # (uint32_t type_idx, Method* method, Thread*, $sp)
-    move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_object_with_access_check
-
-    .extern artAllocObjectFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_alloc_object_with_access_check_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a2, rSELF                # pass Thread::Current
-    jal     artAllocObjectFromCodeWithAccessCheckInstrumented  # (uint32_t type_idx, Method* method, Thread*, $sp)
-    move    $a3, $sp                  # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_object_with_access_check_instrumented
-
-    /*
-     * Called by managed code to allocate an array.
-     */
-    .extern artAllocArrayFromCode
-ENTRY art_quick_alloc_array
+.macro THREE_ARG_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
     GENERATE_GLOBAL_POINTER
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
     move    $a3, rSELF                # pass Thread::Current
-    # artAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t component_count, Thread*, $sp)
-    jal     artAllocArrayFromCode
+    jal     \entrypoint
     sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_array
+    \return
+END \name
+.endm
 
-    .extern artAllocArrayFromCodeInstrumented
-ENTRY art_quick_alloc_array_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t component_count, Thread*, $sp)
-    jal     artAllocArrayFromCodeInstrumented
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_array_instrumented
-
-    /*
-     * Called by managed code to allocate an array when the caller doesn't know whether it has
-     * access to the created type.
-     */
-    .extern artAllocArrayFromCodeWithAccessCheck
-ENTRY art_quick_alloc_array_with_access_check
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artAllocArrayFromCodeWithAccessCheck(type_idx, method, component_count, Thread*, $sp)
-    jal     artAllocArrayFromCodeWithAccessCheck
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_array_with_access_check
-
-    .extern artAllocArrayFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_alloc_array_with_access_check_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, component_count, Thread*, $sp)
-    jal     artAllocArrayFromCodeWithAccessCheckInstrumented
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_alloc_array_with_access_check_instrumented
-
-    /*
-     * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
-     */
-    .extern artCheckAndAllocArrayFromCode
-ENTRY art_quick_check_and_alloc_array
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artCheckAndAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t count, Thread* , $sp)
-    jal     artCheckAndAllocArrayFromCode
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_check_and_alloc_array
-
-    .extern artCheckAndAllocArrayFromCodeInstrumented
-ENTRY art_quick_check_and_alloc_array_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t count, Thread* , $sp)
-    jal     artCheckAndAllocArrayFromCodeInstrumented
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_check_and_alloc_array_instrumented
-
-    /*
-     * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
-     */
-    .extern artCheckAndAllocArrayFromCodeWithAccessCheck
-ENTRY art_quick_check_and_alloc_array_with_access_check
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artCheckAndAllocArrayFromCodeWithAccessCheck(type_idx, method, count, Thread* , $sp)
-    jal     artCheckAndAllocArrayFromCodeWithAccessCheck
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_check_and_alloc_array_with_access_check
-
-    .extern artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
-ENTRY art_quick_check_and_alloc_array_with_access_check_instrumented
-    GENERATE_GLOBAL_POINTER
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
-    move    $a3, rSELF                # pass Thread::Current
-    # artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, count, Thread* , $sp)
-    jal     artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
-    sw      $sp, 16($sp)              # pass $sp
-    RETURN_IF_NONZERO
-END art_quick_check_and_alloc_array_with_access_check_instrumented
+// Generate the allocation entrypoints for each allocator.
+GENERATE_ALL_ALLOC_ENTRYPOINTS
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
new file mode 100644
index 0000000..bdadc51
--- /dev/null
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+.macro GENERATE_ALLOC_ENTRYPOINTS c_suffix, cxx_suffix
+// Called by managed code to allocate an object.
+TWO_ARG_DOWNCALL art_quick_alloc_object\c_suffix, artAllocObjectFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an object when the caller doesn't know whether it has access
+// to the created type.
+TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check\c_suffix, artAllocObjectFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array.
+THREE_ARG_DOWNCALL art_quick_alloc_array\c_suffix, artAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array when the caller doesn't know whether it has access
+// to the created type.
+THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check\c_suffix, artAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
+THREE_ARG_DOWNCALL art_quick_check_and_alloc_array\c_suffix, artCheckAndAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
+THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check\c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+.endm
+
+.macro GENERATE_ALL_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS
+GENERATE_ALLOC_ENTRYPOINTS _instrumented, Instrumented
+GENERATE_ALLOC_ENTRYPOINTS _bump_pointer, BumpPointer
+GENERATE_ALLOC_ENTRYPOINTS _bump_pointer_instrumented, BumpPointerInstrumented
+.endm
diff --git a/runtime/arch/quick_alloc_entrypoints.cc b/runtime/arch/quick_alloc_entrypoints.cc
new file mode 100644
index 0000000..192b124
--- /dev/null
+++ b/runtime/arch/quick_alloc_entrypoints.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/heap.h"
+
+#define GENERATE_ENTRYPOINTS(suffix) \
+extern "C" void* art_quick_alloc_array##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_object##suffix(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \
+void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrumented) { \
+  if (instrumented) { \
+    qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix##_instrumented; \
+    qpoints->pAllocObject = art_quick_alloc_object##suffix##_instrumented; \
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix##_instrumented; \
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix##_instrumented; \
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented; \
+  } else { \
+    qpoints->pAllocArray = art_quick_alloc_array##suffix; \
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix; \
+    qpoints->pAllocObject = art_quick_alloc_object##suffix; \
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix; \
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix; \
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix; \
+  } \
+}
+
+namespace art {
+
+// Generate the entrypoint functions.
+GENERATE_ENTRYPOINTS();
+GENERATE_ENTRYPOINTS(_bump_pointer);
+
+static bool entry_points_instrumented = false;
+static gc::AllocatorType entry_points_allocator = kMovingCollector ?
+    gc::kAllocatorTypeBumpPointer : gc::kAllocatorTypeFreeList;
+
+void SetQuickAllocEntryPointsAllocator(gc::AllocatorType allocator) {
+  entry_points_allocator = allocator;
+}
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
+  entry_points_instrumented = instrumented;
+}
+
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+  switch (entry_points_allocator) {
+    case gc::kAllocatorTypeFreeList: {
+      SetQuickAllocEntryPoints(qpoints, entry_points_instrumented);
+      break;
+    }
+    case gc::kAllocatorTypeBumpPointer: {
+      SetQuickAllocEntryPoints_bump_pointer(qpoints, entry_points_instrumented);
+      break;
+    }
+    default: {
+      LOG(FATAL) << "Unimplemented";
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 99b0dd5..6a67079 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -32,21 +32,6 @@
 extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
 extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
 
-// Alloc entrypoints.
-extern "C" void* art_quick_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
-
-extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
-extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
-extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
-
 // Cast entrypoints.
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                                 const mirror::Class* ref_class);
@@ -125,29 +110,7 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
-static bool quick_alloc_entry_points_instrumented = false;
-
-void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
-  quick_alloc_entry_points_instrumented = instrumented;
-}
-
-void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
-  if (quick_alloc_entry_points_instrumented) {
-    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
-    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
-  } else {
-    qpoints->pAllocArray = art_quick_alloc_array;
-    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-    qpoints->pAllocObject = art_quick_alloc_object;
-    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
-  }
-}
+extern void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
 
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6fe4993..9679471 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -16,14 +16,21 @@
 
 #include "asm_support_x86.S"
 
+#include "arch/quick_alloc_entrypoints.S"
+
+// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
+
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      */
 MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
+    .cfi_rel_offset edi, -8
     PUSH esi
+    .cfi_rel_offset esi, -12
     PUSH ebp
+    .cfi_rel_offset ebp, -16
     subl  MACRO_LITERAL(16), %esp  // Grow stack by 4 words, bottom word will hold Method*
     .cfi_adjust_cfa_offset 16
 END_MACRO
@@ -34,8 +41,11 @@
      */
 MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
     PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
+     .cfi_rel_offset edi, -8
     PUSH esi
+    .cfi_rel_offset esi, -12
     PUSH ebp
+     .cfi_rel_offset ebp, -16
     subl  MACRO_LITERAL(16), %esp  // Grow stack by 4 words, bottom word will hold Method*
     .cfi_adjust_cfa_offset 16
 END_MACRO
@@ -43,8 +53,11 @@
 MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
     addl MACRO_LITERAL(16), %esp  // Unwind stack up to return address
     POP ebp  // Restore callee saves (ebx is saved/restored by the upcall)
+    .cfi_restore ebp
     POP esi
+    .cfi_restore esi
     POP edi
+    .cfi_restore edi
     .cfi_adjust_cfa_offset -28
 END_MACRO
 
@@ -54,23 +67,36 @@
      */
 MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
     PUSH edi  // Save callee saves
+    .cfi_rel_offset edi, -8
     PUSH esi
+    .cfi_rel_offset esi, -12
     PUSH ebp
+    .cfi_rel_offset ebp, -16
     PUSH ebx  // Save args
+    .cfi_rel_offset ebx, -20
     PUSH edx
+    .cfi_rel_offset edx, -24
     PUSH ecx
+    .cfi_rel_offset ecx, -28
     PUSH eax   // Align stack, eax will be clobbered by Method*
+    .cfi_rel_offset eax, -28
 END_MACRO
 
 MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
     addl MACRO_LITERAL(4), %esp  // Remove padding
     .cfi_adjust_cfa_offset -4
     POP ecx  // Restore args except eax
+    .cfi_restore ecx
     POP edx
+    .cfi_restore edx
     POP ebx
+    .cfi_restore ebx
     POP ebp  // Restore callee saves
+    .cfi_restore ebp
     POP esi
+    .cfi_restore esi
     POP edi
+    .cfi_restore edi
 END_MACRO
 
     /*
@@ -188,12 +214,19 @@
     // Set up the callee save frame to conform with Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
     // return address
     PUSH edi
+    .cfi_rel_offset edi, -8
     PUSH esi
+    .cfi_rel_offset esi, -12
     PUSH ebp
-    PUSH ebx
+    .cfi_rel_offset ebp, -16
+    PUSH ebx  // Save args
+    .cfi_rel_offset ebx, -20
     PUSH edx
+    .cfi_rel_offset edx, -24
     PUSH ecx
-    PUSH eax   // <-- callee save Method* to go here
+    .cfi_rel_offset ecx, -28
+    PUSH eax    // <-- callee save Method* to go here
+    .cfi_rel_offset eax, -32
     movl %esp, %edx  // remember SP
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp  // alignment padding
@@ -209,11 +242,16 @@
     movl %edx, %edi               // save code pointer in EDI
     addl MACRO_LITERAL(36), %esp  // Pop arguments skip eax
     .cfi_adjust_cfa_offset -36
-    POP ecx                       // Restore args
+    POP ecx  // Restore args except eax
+    .cfi_restore ecx
     POP edx
+    .cfi_restore edx
     POP ebx
-    POP ebp  // Restore callee saves.
+    .cfi_restore ebx
+    POP ebp  // Restore callee saves
+    .cfi_restore ebp
     POP esi
+    .cfi_restore esi
     // Swap EDI callee save with code pointer.
     xchgl %edi, (%esp)
     testl %eax, %eax              // Branch forward if exception pending.
@@ -248,7 +286,9 @@
      */
 DEFINE_FUNCTION art_quick_invoke_stub
     PUSH ebp                      // save ebp
+    .cfi_rel_offset ebp, -8
     PUSH ebx                      // save ebx
+    .cfi_rel_offset ebx, -12
     mov %esp, %ebp                // copy value of stack pointer into base pointer
     .cfi_def_cfa_register ebp
     mov 20(%ebp), %ebx            // get arg array size
@@ -269,8 +309,11 @@
     mov 12(%esp), %ebx            // copy arg3 into ebx
     call *METHOD_CODE_OFFSET(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
+    .cfi_def_cfa_register esp
     POP ebx                       // pop ebx
+    .cfi_restore ebx
     POP ebp                       // pop ebp
+    .cfi_restore ebp
     mov 20(%esp), %ecx            // get result pointer
     cmpl LITERAL(68), 24(%esp)    // test if result type char == 'D'
     je return_double_quick
@@ -360,7 +403,7 @@
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
-MACRO0(RETURN_IF_EAX_NOT_ZERO)
+MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
     testl %eax, %eax               // eax == 0 ?
     jz  1f                         // if eax == 0 goto 1
     ret                            // return
@@ -385,24 +428,13 @@
     DELIVER_PENDING_EXCEPTION
 END_MACRO
 
-TWO_ARG_DOWNCALL art_quick_alloc_object, artAllocObjectFromCode, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check, artAllocObjectFromCodeWithAccessCheck, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_alloc_array, artAllocArrayFromCode, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check, artAllocArrayFromCodeWithAccessCheck, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_check_and_alloc_array, artCheckAndAllocArrayFromCode, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check, artCheckAndAllocArrayFromCodeWithAccessCheck, RETURN_IF_EAX_NOT_ZERO
+// Generate the allocation entrypoints for each allocator.
+GENERATE_ALL_ALLOC_ENTRYPOINTS
 
-TWO_ARG_DOWNCALL art_quick_alloc_object_instrumented, artAllocObjectFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check_instrumented, artAllocObjectFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_alloc_array_instrumented, artAllocArrayFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check_instrumented, artAllocArrayFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_instrumented, artCheckAndAllocArrayFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
-THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check_instrumented, artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
-
-TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_EAX_NOT_ZERO
-TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_EAX_NOT_ZERO
+TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 
 TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
@@ -495,7 +527,9 @@
 DEFINE_FUNCTION art_quick_check_cast
     PUSH eax                     // alignment padding
     PUSH ecx                     // pass arg2 - obj->klass
+    .cfi_rel_offset ecx, -12
     PUSH eax                     // pass arg1 - checked class
+    .cfi_rel_offset eax, -16
     call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
     testl %eax, %eax
     jz 1f                         // jump forward if not assignable
@@ -504,7 +538,9 @@
     ret
 1:
     POP eax                       // pop arguments
+    .cfi_restore eax
     POP ecx
+    .cfi_restore ecx
     addl LITERAL(4), %esp
     .cfi_adjust_cfa_offset -12
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index 298ae56..91fc143 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -66,7 +66,7 @@
 // Check that barrier wait and barrier increment work.
 TEST_F(BarrierTest, CheckWait) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Barrier test thread pool", num_threads);
   Barrier barrier(0);
   AtomicInteger count1(0);
   AtomicInteger count2(0);
@@ -121,7 +121,7 @@
 // Check that barrier pass through works.
 TEST_F(BarrierTest, CheckPass) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Barrier test thread pool", num_threads);
   Barrier barrier(0);
   AtomicInteger count(0);
   const int32_t num_tasks = num_threads * 4;
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index 0345266..7c09999 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -39,6 +39,13 @@
   BucketiseValue(value);
 }
 
+template <class Value> inline Histogram<Value>::Histogram(const char* name)
+    : kAdjust(0),
+      kInitialBucketCount(0),
+      name_(name),
+      max_buckets_(0) {
+}
+
 template <class Value>
 inline Histogram<Value>::Histogram(const char* name, Value initial_bucket_width,
                                    size_t max_buckets)
@@ -162,28 +169,30 @@
 
   double per_0 = (1.0 - interval) / 2.0;
   double per_1 = per_0 + interval;
-  os << Name() << ":\t";
   TimeUnit unit = GetAppropriateTimeUnit(Mean() * kAdjust);
+  os << Name() << ":\tSum: ";
+  os << PrettyDuration(Sum() * kAdjust) << " ";
   os << (interval * 100) << "% C.I. " << FormatDuration(Percentile(per_0, data) * kAdjust, unit);
   os << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit) << " ";
   os << "Avg: " << FormatDuration(Mean() * kAdjust, unit) << " Max: ";
   os << FormatDuration(Max() * kAdjust, unit) << "\n";
 }
 
-template <class Value> inline void Histogram<Value>::CreateHistogram(CumulativeData& out_data) {
+template <class Value>
+inline void Histogram<Value>::CreateHistogram(CumulativeData* out_data) const {
   DCHECK_GT(sample_size_, 0ull);
-  out_data.freq_.clear();
-  out_data.perc_.clear();
+  out_data->freq_.clear();
+  out_data->perc_.clear();
   uint64_t accumulated = 0;
-  out_data.freq_.push_back(accumulated);
-  out_data.perc_.push_back(0.0);
+  out_data->freq_.push_back(accumulated);
+  out_data->perc_.push_back(0.0);
   for (size_t idx = 0; idx < frequency_.size(); idx++) {
     accumulated += frequency_[idx];
-    out_data.freq_.push_back(accumulated);
-    out_data.perc_.push_back(static_cast<double>(accumulated) / static_cast<double>(sample_size_));
+    out_data->freq_.push_back(accumulated);
+    out_data->perc_.push_back(static_cast<double>(accumulated) / static_cast<double>(sample_size_));
   }
-  DCHECK_EQ(out_data.freq_.back(), sample_size_);
-  DCHECK_LE(std::abs(out_data.perc_.back() - 1.0), 0.001);
+  DCHECK_EQ(out_data->freq_.back(), sample_size_);
+  DCHECK_LE(std::abs(out_data->perc_.back() - 1.0), 0.001);
 }
 
 template <class Value>
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index 2a02cf4..a7d51e2 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -40,6 +40,10 @@
     std::vector<double> perc_;
   };
 
+  // Used by the cumulative timing logger to search the histogram set using for an existing split
+  // with the same name using CumulativeLogger::HistogramComparator.
+  explicit Histogram(const char* name);
+  // This is the expected constructor when creating new Histograms.
   Histogram(const char* name, Value initial_bucket_width, size_t max_buckets = 100);
   void AddValue(Value);
   // Builds the cumulative distribution function from the frequency data.
@@ -47,7 +51,7 @@
   // cumulative_freq[i] = sum(frequency[j] : 0 < j < i )
   // Accumulative summation of percentiles; which is the frequency / SampleSize
   // cumulative_perc[i] = sum(frequency[j] / SampleSize : 0 < j < i )
-  void CreateHistogram(CumulativeData& data);
+  void CreateHistogram(CumulativeData* data) const;
   // Reset the cumulative values, next time CreateHistogram is called it will recreate the cache.
   void Reset();
   double Mean() const;
diff --git a/runtime/base/histogram_test.cc b/runtime/base/histogram_test.cc
index 534440c..966b97f 100644
--- a/runtime/base/histogram_test.cc
+++ b/runtime/base/histogram_test.cc
@@ -85,7 +85,7 @@
   hist->AddValue(145);
   hist->AddValue(155);
 
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
   EXPECT_EQ(875, static_cast<int>(PerValue * 10));
 }
@@ -117,12 +117,12 @@
   hist->AddValue(200);
   hist->AddValue(205);
   hist->AddValue(212);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
 
   std::string text;
   std::stringstream stream;
-  std::string expected("UpdateRange:\t99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
+  std::string expected("UpdateRange:\tSum: 2.654ms 99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
   hist->PrintConfidenceIntervals(stream, 0.99, data);
 
   EXPECT_EQ(expected, stream.str());
@@ -132,7 +132,6 @@
 
 TEST(Histtest, Reset) {
   UniquePtr<Histogram<uint64_t> > hist(new Histogram<uint64_t>("Reset", 5));
-  Histogram<uint64_t>::CumulativeData data;
 
   double PerValue;
   hist->AddValue(0);
@@ -160,12 +159,13 @@
   hist->AddValue(200);
   hist->AddValue(205);
   hist->AddValue(212);
-  hist->CreateHistogram(data);
+  Histogram<uint64_t>::CumulativeData data;
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
 
   std::string text;
   std::stringstream stream;
-  std::string expected("Reset:\t99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
+  std::string expected("Reset:\tSum: 2.654ms 99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
   hist->PrintConfidenceIntervals(stream, 0.99, data);
 
   EXPECT_EQ(expected, stream.str());
@@ -185,7 +185,7 @@
   hist->AddValue(68);
   hist->AddValue(75);
   hist->AddValue(93);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   hist->AddValue(110);
   hist->AddValue(121);
   hist->AddValue(132);
@@ -194,17 +194,17 @@
   hist->AddValue(155);
   hist->AddValue(163);
   hist->AddValue(168);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   hist->AddValue(175);
   hist->AddValue(182);
   hist->AddValue(193);
   hist->AddValue(200);
   hist->AddValue(205);
   hist->AddValue(212);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   PerValue = hist->Percentile(0.50, data);
   std::stringstream stream;
-  std::string expected("MultipleCreateHist:\t99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
+  std::string expected("MultipleCreateHist:\tSum: 2.654ms 99% C.I. 15us-212us Avg: 126.380us Max: 212us\n");
   hist->PrintConfidenceIntervals(stream, 0.99, data);
 
   EXPECT_EQ(expected, stream.str());
@@ -217,9 +217,9 @@
   Histogram<uint64_t>::CumulativeData data;
 
   hist->AddValue(1);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   std::stringstream stream;
-  std::string expected = "SingleValue:\t99% C.I. 1us-1us Avg: 1us Max: 1us\n";
+  std::string expected = "SingleValue:\tSum: 1us 99% C.I. 1us-1us Avg: 1us Max: 1us\n";
   hist->PrintConfidenceIntervals(stream, 0.99, data);
   EXPECT_EQ(expected, stream.str());
 }
@@ -234,7 +234,7 @@
   for (uint64_t idx = 0ull; idx < 150ull; idx++) {
     hist->AddValue(0);
   }
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   per_995 = hist->Percentile(0.995, data);
   EXPECT_EQ(per_995, 0);
   hist->Reset();
@@ -243,7 +243,7 @@
       hist->AddValue(val);
     }
   }
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   per_005 = hist->Percentile(0.005, data);
   per_995 = hist->Percentile(0.995, data);
   EXPECT_EQ(1, per_005);
@@ -260,9 +260,9 @@
     }
   }
   hist->AddValue(10000);
-  hist->CreateHistogram(data);
+  hist->CreateHistogram(&data);
   std::stringstream stream;
-  std::string expected = "SpikyValues:\t99% C.I. 0.089us-2541.825us Avg: 95.033us Max: 10000us\n";
+  std::string expected = "SpikyValues:\tSum: 14.350ms 99% C.I. 0.089us-2541.825us Avg: 95.033us Max: 10000us\n";
   hist->PrintConfidenceIntervals(stream, 0.99, data);
   EXPECT_EQ(expected, stream.str());
 }
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 3d842a0..3aabc8d 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -19,6 +19,7 @@
 #include "base/mutex.h"
 #include "runtime.h"
 #include "thread-inl.h"
+#include "UniquePtr.h"
 #include "utils.h"
 
 namespace art {
@@ -28,20 +29,21 @@
 unsigned int gAborting = 0;
 
 static LogSeverity gMinimumLogSeverity = INFO;
-static std::string* gCmdLine = NULL;
-static std::string* gProgramInvocationName = NULL;
-static std::string* gProgramInvocationShortName = NULL;
+static UniquePtr<std::string> gCmdLine;
+static UniquePtr<std::string> gProgramInvocationName;
+static UniquePtr<std::string> gProgramInvocationShortName;
 
 const char* GetCmdLine() {
-  return (gCmdLine != NULL) ? gCmdLine->c_str() : NULL;
+  return (gCmdLine.get() != nullptr) ? gCmdLine->c_str() : nullptr;
 }
 
 const char* ProgramInvocationName() {
-  return (gProgramInvocationName != NULL) ? gProgramInvocationName->c_str() : "art";
+  return (gProgramInvocationName.get() != nullptr) ? gProgramInvocationName->c_str() : "art";
 }
 
 const char* ProgramInvocationShortName() {
-  return (gProgramInvocationShortName != NULL) ? gProgramInvocationShortName->c_str() : "art";
+  return (gProgramInvocationShortName.get() != nullptr) ? gProgramInvocationShortName->c_str()
+                                                        : "art";
 }
 
 // Configure logging based on ANDROID_LOG_TAGS environment variable.
@@ -53,7 +55,7 @@
 // and a letter indicating the minimum priority level we're expected to log.
 // This can be used to reveal or conceal logs with specific tags.
 void InitLogging(char* argv[]) {
-  if (gCmdLine != NULL) {
+  if (gCmdLine.get() != nullptr) {
     return;
   }
   // TODO: Move this to a more obvious InitART...
@@ -63,17 +65,18 @@
   // but we don't have that luxury on the Mac, and there are a couple of argv[0] variants that are
   // commonly used.
   if (argv != NULL) {
-    gCmdLine = new std::string(argv[0]);
+    gCmdLine.reset(new std::string(argv[0]));
     for (size_t i = 1; argv[i] != NULL; ++i) {
       gCmdLine->append(" ");
       gCmdLine->append(argv[i]);
     }
-    gProgramInvocationName = new std::string(argv[0]);
+    gProgramInvocationName.reset(new std::string(argv[0]));
     const char* last_slash = strrchr(argv[0], '/');
-    gProgramInvocationShortName = new std::string((last_slash != NULL) ? last_slash + 1 : argv[0]);
+    gProgramInvocationShortName.reset(new std::string((last_slash != NULL) ? last_slash + 1
+                                                                           : argv[0]));
   } else {
     // TODO: fall back to /proc/self/cmdline when argv is NULL on Linux
-    gCmdLine = new std::string("<unset>");
+    gCmdLine.reset(new std::string("<unset>"));
   }
   const char* tags = getenv("ANDROID_LOG_TAGS");
   if (tags == NULL) {
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index c0cfee2..29b3981 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -130,7 +130,7 @@
   // TODO: tighten this check.
   if (kDebugLocking) {
     Runtime* runtime = Runtime::Current();
-    CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown() ||
+    CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDownLocked() ||
           level == kDefaultMutexLevel  || level == kRuntimeShutdownLock ||
           level == kThreadListLock || level == kLoggingLock || level == kAbortLock);
   }
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 249f031..ec79c55 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -266,9 +266,8 @@
 Mutex::~Mutex() {
 #if ART_USE_FUTEXES
   if (state_ != 0) {
-    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+    bool shutting_down = runtime == nullptr || runtime->IsShuttingDown(Thread::Current());
     LOG(shutting_down ? WARNING : FATAL) << "destroying mutex with owner: " << exclusive_owner_;
   } else {
     CHECK_EQ(exclusive_owner_, 0U)  << "unexpectedly found an owner on unlocked mutex " << name_;
@@ -283,7 +282,7 @@
     // TODO: should we just not log at all if shutting down? this could be the logging mutex!
     MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDownLocked();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_mutex_destroy failed for " << name_;
   }
 #endif
@@ -454,7 +453,7 @@
     // TODO: should we just not log at all if shutting down? this could be the logging mutex!
     MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = runtime == NULL || runtime->IsShuttingDown();
+    bool shutting_down = runtime == NULL || runtime->IsShuttingDownLocked();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_rwlock_destroy failed for " << name_;
   }
 #endif
@@ -641,9 +640,8 @@
 ConditionVariable::~ConditionVariable() {
 #if ART_USE_FUTEXES
   if (num_waiters_!= 0) {
-    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+    bool shutting_down = runtime == nullptr || runtime->IsShuttingDown(Thread::Current());
     LOG(shutting_down ? WARNING : FATAL) << "ConditionVariable::~ConditionVariable for " << name_
         << " called with " << num_waiters_ << " waiters.";
   }
@@ -655,7 +653,7 @@
     errno = rc;
     MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
     Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDownLocked();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_cond_destroy failed for " << name_;
   }
 #endif
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index 6df1126..c8dee6d 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -39,7 +39,7 @@
 }
 
 CumulativeLogger::~CumulativeLogger() {
-  STLDeleteValues(&histograms_);
+  STLDeleteElements(&histograms_);
 }
 
 void CumulativeLogger::SetName(const std::string& name) {
@@ -57,7 +57,7 @@
 void CumulativeLogger::Reset() {
   MutexLock mu(Thread::Current(), lock_);
   iterations_ = 0;
-  STLDeleteValues(&histograms_);
+  STLDeleteElements(&histograms_);
 }
 
 uint64_t CumulativeLogger::GetTotalNs() const {
@@ -67,60 +67,72 @@
 uint64_t CumulativeLogger::GetTotalTime() const {
   MutexLock mu(Thread::Current(), lock_);
   uint64_t total = 0;
-  for (CumulativeLogger::HistogramsIterator it = histograms_.begin(), end = histograms_.end();
-       it != end; ++it) {
-    total += it->second->Sum();
+  for (Histogram<uint64_t>* histogram : histograms_) {
+    total += histogram->Sum();
   }
   return total;
 }
 
-void CumulativeLogger::AddLogger(const base::TimingLogger &logger) {
+void CumulativeLogger::AddLogger(const TimingLogger &logger) {
   MutexLock mu(Thread::Current(), lock_);
-  const base::TimingLogger::SplitTimings& splits = logger.GetSplits();
-  for (base::TimingLogger::SplitTimingsIterator it = splits.begin(), end = splits.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  const TimingLogger::SplitTimings& splits = logger.GetSplits();
+  for (auto it = splits.begin(), end = splits.end(); it != end; ++it) {
+    TimingLogger::SplitTiming split = *it;
     uint64_t split_time = split.first;
     const char* split_name = split.second;
     AddPair(split_name, split_time);
   }
 }
 
+size_t CumulativeLogger::GetIterations() const {
+  MutexLock mu(Thread::Current(), lock_);
+  return iterations_;
+}
+
 void CumulativeLogger::Dump(std::ostream &os) {
   MutexLock mu(Thread::Current(), lock_);
   DumpHistogram(os);
 }
 
-void CumulativeLogger::AddPair(const std::string &label, uint64_t delta_time) {
+void CumulativeLogger::AddPair(const std::string& label, uint64_t delta_time) {
   // Convert delta time to microseconds so that we don't overflow our counters.
   delta_time /= kAdjust;
 
-  if (histograms_.find(label) == histograms_.end()) {
-    // TODO: Shoud this be a defined constant so we we know out of which orifice 16 and 100 were picked?
-    const size_t max_buckets = Runtime::Current()->GetHeap()->IsLowMemoryMode() ? 16 : 100;
-    // TODO: Should this be a defined constant so we know 50 of WTF?
-    histograms_[label] = new Histogram<uint64_t>(label.c_str(), 50, max_buckets);
+  Histogram<uint64_t>* histogram;
+  Histogram<uint64_t> dummy(label.c_str());
+  auto it = histograms_.find(&dummy);
+  if (it == histograms_.end()) {
+    const size_t max_buckets = Runtime::Current()->GetHeap()->IsLowMemoryMode() ?
+        kLowMemoryBucketCount : kDefaultBucketCount;
+    histogram = new Histogram<uint64_t>(label.c_str(), kInitialBucketSize, max_buckets);
+    histograms_.insert(histogram);
+  } else {
+    histogram = *it;
   }
-  histograms_[label]->AddValue(delta_time);
+  histogram->AddValue(delta_time);
 }
 
+class CompareHistorgramByTimeSpentDeclining {
+ public:
+  bool operator()(const Histogram<uint64_t>* a, const Histogram<uint64_t>* b) const {
+    return a->Sum() > b->Sum();
+  }
+};
+
 void CumulativeLogger::DumpHistogram(std::ostream &os) {
   os << "Start Dumping histograms for " << iterations_ << " iterations"
      << " for " << name_ << "\n";
-  for (CumulativeLogger::HistogramsIterator it = histograms_.begin(), end = histograms_.end();
-       it != end; ++it) {
+  std::set<Histogram<uint64_t>*, CompareHistorgramByTimeSpentDeclining>
+      sorted_histograms(histograms_.begin(), histograms_.end());
+  for (Histogram<uint64_t>* histogram : sorted_histograms) {
     Histogram<uint64_t>::CumulativeData cumulative_data;
-    it->second->CreateHistogram(cumulative_data);
-    it->second->PrintConfidenceIntervals(os, 0.99, cumulative_data);
-    // Reset cumulative values to save memory. We don't expect DumpHistogram to be called often, so
-    // it is not performance critical.
+    // We don't expect DumpHistogram to be called often, so it is not performance critical.
+    histogram->CreateHistogram(&cumulative_data);
+    histogram->PrintConfidenceIntervals(os, 0.99, cumulative_data);
   }
   os << "Done Dumping histograms \n";
 }
 
-
-namespace base {
-
 TimingLogger::TimingLogger(const char* name, bool precise, bool verbose)
     : name_(name), precise_(precise), verbose_(verbose), current_split_(NULL) {
 }
@@ -131,33 +143,35 @@
 }
 
 void TimingLogger::StartSplit(const char* new_split_label) {
-  DCHECK(new_split_label != NULL) << "Starting split (" << new_split_label << ") with null label.";
-  TimingLogger::ScopedSplit* explicit_scoped_split = new TimingLogger::ScopedSplit(new_split_label, this);
+  DCHECK(new_split_label != nullptr) << "Starting split with null label.";
+  TimingLogger::ScopedSplit* explicit_scoped_split =
+      new TimingLogger::ScopedSplit(new_split_label, this);
   explicit_scoped_split->explicit_ = true;
 }
 
 void TimingLogger::EndSplit() {
-  CHECK(current_split_ != NULL) << "Ending a non-existent split.";
-  DCHECK(current_split_->label_ != NULL);
-  DCHECK(current_split_->explicit_ == true) << "Explicitly ending scoped split: " << current_split_->label_;
-
+  CHECK(current_split_ != nullptr) << "Ending a non-existent split.";
+  DCHECK(current_split_->label_ != nullptr);
+  DCHECK(current_split_->explicit_ == true)
+      << "Explicitly ending scoped split: " << current_split_->label_;
   delete current_split_;
+  // TODO: current_split_ = nullptr;
 }
 
 // Ends the current split and starts the one given by the label.
 void TimingLogger::NewSplit(const char* new_split_label) {
-  CHECK(current_split_ != NULL) << "Inserting a new split (" << new_split_label
-                                << ") into a non-existent split.";
-  DCHECK(new_split_label != NULL) << "New split (" << new_split_label << ") with null label.";
-
-  current_split_->TailInsertSplit(new_split_label);
+  if (current_split_ == nullptr) {
+    StartSplit(new_split_label);
+  } else {
+    DCHECK(new_split_label != nullptr) << "New split (" << new_split_label << ") with null label.";
+    current_split_->TailInsertSplit(new_split_label);
+  }
 }
 
 uint64_t TimingLogger::GetTotalNs() const {
   uint64_t total_ns = 0;
-  for (base::TimingLogger::SplitTimingsIterator it = splits_.begin(), end = splits_.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
+    TimingLogger::SplitTiming split = *it;
     total_ns += split.first;
   }
   return total_ns;
@@ -166,9 +180,8 @@
 void TimingLogger::Dump(std::ostream &os) const {
   uint64_t longest_split = 0;
   uint64_t total_ns = 0;
-  for (base::TimingLogger::SplitTimingsIterator it = splits_.begin(), end = splits_.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
+    TimingLogger::SplitTiming split = *it;
     uint64_t split_time = split.first;
     longest_split = std::max(longest_split, split_time);
     total_ns += split_time;
@@ -177,9 +190,8 @@
   TimeUnit tu = GetAppropriateTimeUnit(longest_split);
   uint64_t divisor = GetNsToTimeUnitDivisor(tu);
   // Print formatted splits.
-  for (base::TimingLogger::SplitTimingsIterator it = splits_.begin(), end = splits_.end();
-       it != end; ++it) {
-    base::TimingLogger::SplitTiming split = *it;
+  for (auto it = splits_.begin(), end = splits_.end(); it != end; ++it) {
+    const TimingLogger::SplitTiming& split = *it;
     uint64_t split_time = split.first;
     if (!precise_ && divisor >= 1000) {
       // Make the fractional part 0.
@@ -226,7 +238,7 @@
     LOG(INFO) << "End: " << label_ << " " << PrettyDuration(split_time);
   }
 
-  // If one or more enclosed explcitly started splits are not terminated we can
+  // If one or more enclosed explicitly started splits are not terminated we can
   // either fail or "unwind" the stack of splits in the timing logger to 'this'
   // (by deleting the intervening scoped splits). This implements the latter.
   TimingLogger::ScopedSplit* current = timing_logger_->current_split_;
@@ -288,5 +300,4 @@
   ATRACE_BEGIN(label_);
 }
 
-}  // namespace base
 }  // namespace art
diff --git a/runtime/base/timing_logger.h b/runtime/base/timing_logger.h
index 07d1ee0..c1ff0a3 100644
--- a/runtime/base/timing_logger.h
+++ b/runtime/base/timing_logger.h
@@ -21,15 +21,12 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 
+#include <set>
 #include <string>
 #include <vector>
-#include <map>
 
 namespace art {
-
-namespace base {
-  class TimingLogger;
-}  // namespace base
+class TimingLogger;
 
 class CumulativeLogger {
  public:
@@ -44,18 +41,27 @@
   // Allow the name to be modified, particularly when the cumulative logger is a field within a
   // parent class that is unable to determine the "name" of a sub-class.
   void SetName(const std::string& name);
-  void AddLogger(const base::TimingLogger& logger) LOCKS_EXCLUDED(lock_);
+  void AddLogger(const TimingLogger& logger) LOCKS_EXCLUDED(lock_);
+  size_t GetIterations() const;
 
  private:
-  typedef std::map<std::string, Histogram<uint64_t> *> Histograms;
-  typedef std::map<std::string, Histogram<uint64_t> *>::const_iterator HistogramsIterator;
+  class HistogramComparator {
+   public:
+    bool operator()(const Histogram<uint64_t>* a, const Histogram<uint64_t>* b) const {
+      return a->Name() < b->Name();
+    }
+  };
+
+  static constexpr size_t kLowMemoryBucketCount = 16;
+  static constexpr size_t kDefaultBucketCount = 100;
+  static constexpr size_t kInitialBucketSize = 50;  // 50 microseconds.
 
   void AddPair(const std::string &label, uint64_t delta_time)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
   void DumpHistogram(std::ostream &os) EXCLUSIVE_LOCKS_REQUIRED(lock_);
   uint64_t GetTotalTime() const;
   static const uint64_t kAdjust = 1000;
-  Histograms histograms_ GUARDED_BY(lock_);
+  std::set<Histogram<uint64_t>*, HistogramComparator> histograms_ GUARDED_BY(lock_);
   std::string name_;
   const std::string lock_name_;
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -64,19 +70,17 @@
   DISALLOW_COPY_AND_ASSIGN(CumulativeLogger);
 };
 
-namespace base {
-
-
 // A timing logger that knows when a split starts for the purposes of logging tools, like systrace.
 class TimingLogger {
  public:
   // Splits are nanosecond times and split names.
   typedef std::pair<uint64_t, const char*> SplitTiming;
   typedef std::vector<SplitTiming> SplitTimings;
-  typedef std::vector<SplitTiming>::const_iterator SplitTimingsIterator;
 
   explicit TimingLogger(const char* name, bool precise, bool verbose);
-
+  ~TimingLogger() {
+    // TODO: DCHECK(current_split_ == nullptr) << "Forgot to end split: " << current_split_->label_;
+  }
   // Clears current splits and labels.
   void Reset();
 
@@ -142,7 +146,7 @@
   friend class ScopedSplit;
  protected:
   // The name of the timing logger.
-  const char* name_;
+  const char* const name_;
 
   // Do we want to print the exactly recorded split (true) or round down to the time unit being
   // used (false).
@@ -161,7 +165,6 @@
   DISALLOW_COPY_AND_ASSIGN(TimingLogger);
 };
 
-}  // namespace base
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_TIMING_LOGGER_H_
diff --git a/runtime/base/timing_logger_test.cc b/runtime/base/timing_logger_test.cc
index 8f28e48..03cc9cc 100644
--- a/runtime/base/timing_logger_test.cc
+++ b/runtime/base/timing_logger_test.cc
@@ -26,13 +26,13 @@
 
 TEST_F(TimingLoggerTest, StartEnd) {
   const char* split1name = "First Split";
-  base::TimingLogger timings("StartEnd", true, false);
+  TimingLogger timings("StartEnd", true, false);
 
   timings.StartSplit(split1name);
 
   timings.EndSplit();  // Ends split1.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(1U, splits.size());
   EXPECT_STREQ(splits[0].second, split1name);
@@ -43,7 +43,7 @@
   const char* split1name = "First Split";
   const char* split2name = "Second Split";
   const char* split3name = "Third Split";
-  base::TimingLogger timings("StartNewEnd", true, false);
+  TimingLogger timings("StartNewEnd", true, false);
 
   timings.StartSplit(split1name);
 
@@ -53,7 +53,7 @@
 
   timings.EndSplit();  // Ends split3.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(3U, splits.size());
   EXPECT_STREQ(splits[0].second, split1name);
@@ -67,7 +67,7 @@
   const char* split3name = "Third Split";
   const char* split4name = "Fourth Split";
   const char* split5name = "Fifth Split";
-  base::TimingLogger timings("StartNewEndNested", true, false);
+  TimingLogger timings("StartNewEndNested", true, false);
 
   timings.StartSplit(split1name);
 
@@ -85,7 +85,7 @@
 
   timings.EndSplit();  // Ends split2.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(5U, splits.size());
   EXPECT_STREQ(splits[0].second, split1name);
@@ -101,25 +101,25 @@
   const char* innersplit1 = "Inner Split 1";
   const char* innerinnersplit1 = "Inner Inner Split 1";
   const char* innersplit2 = "Inner Split 2";
-  base::TimingLogger timings("Scoped", true, false);
+  TimingLogger timings("Scoped", true, false);
 
   {
-      base::TimingLogger::ScopedSplit outer(outersplit, &timings);
+      TimingLogger::ScopedSplit outer(outersplit, &timings);
 
       {
-          base::TimingLogger::ScopedSplit inner1(innersplit1, &timings);
+          TimingLogger::ScopedSplit inner1(innersplit1, &timings);
 
           {
-              base::TimingLogger::ScopedSplit innerinner1(innerinnersplit1, &timings);
+              TimingLogger::ScopedSplit innerinner1(innerinnersplit1, &timings);
           }  // Ends innerinnersplit1.
       }  // Ends innersplit1.
 
       {
-          base::TimingLogger::ScopedSplit inner2(innersplit2, &timings);
+          TimingLogger::ScopedSplit inner2(innersplit2, &timings);
       }  // Ends innersplit2.
   }  // Ends outersplit.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(4U, splits.size());
   EXPECT_STREQ(splits[0].second, innerinnersplit1);
@@ -134,12 +134,12 @@
   const char* innersplit = "Inner Split";
   const char* innerinnersplit1 = "Inner Inner Split 1";
   const char* innerinnersplit2 = "Inner Inner Split 2";
-  base::TimingLogger timings("Scoped", true, false);
+  TimingLogger timings("Scoped", true, false);
 
   timings.StartSplit(outersplit);
 
   {
-      base::TimingLogger::ScopedSplit inner(innersplit, &timings);
+      TimingLogger::ScopedSplit inner(innersplit, &timings);
 
       timings.StartSplit(innerinnersplit1);
 
@@ -148,7 +148,7 @@
 
   timings.EndSplit();  // Ends outersplit.
 
-  const base::TimingLogger::SplitTimings& splits = timings.GetSplits();
+  const TimingLogger::SplitTimings& splits = timings.GetSplits();
 
   EXPECT_EQ(4U, splits.size());
   EXPECT_STREQ(splits[0].second, innerinnersplit1);
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 54cbfe6..a84e18a 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -205,7 +205,7 @@
         // If java_object is a weak global ref whose referent has been cleared,
         // obj will be NULL.  Otherwise, obj should always be non-NULL
         // and valid.
-        if (!Runtime::Current()->GetHeap()->IsHeapAddress(obj)) {
+        if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(obj)) {
           Runtime::Current()->GetHeap()->DumpSpaces();
           JniAbortF(function_name_, "field operation on invalid %s: %p",
                     ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
@@ -242,7 +242,7 @@
   void CheckInstanceFieldID(jobject java_object, jfieldID fid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
-    if (o == NULL || !Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
+    if (o == NULL || !Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "field operation on invalid %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
@@ -455,7 +455,8 @@
           mirror::Class* c = reinterpret_cast<mirror::Class*>(Thread::Current()->DecodeJObject(jc));
           if (c == NULL) {
             msg += "NULL";
-          } else if (c == kInvalidIndirectRefObject || !Runtime::Current()->GetHeap()->IsHeapAddress(c)) {
+          } else if (c == kInvalidIndirectRefObject ||
+              !Runtime::Current()->GetHeap()->IsValidObjectAddress(c)) {
             StringAppendF(&msg, "INVALID POINTER:%p", jc);
           } else if (!c->IsClass()) {
             msg += "INVALID NON-CLASS OBJECT OF TYPE:" + PrettyTypeOf(c);
@@ -621,7 +622,7 @@
     }
 
     mirror::Object* obj = soa_.Decode<mirror::Object*>(java_object);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(obj)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(obj)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "%s is an invalid %s: %p (%p)",
                 what, ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object, obj);
@@ -675,7 +676,7 @@
     }
 
     mirror::Array* a = soa_.Decode<mirror::Array*>(java_array);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(a)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(a)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "jarray is an invalid %s: %p (%p)",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_array)).c_str(), java_array, a);
@@ -696,7 +697,7 @@
       return NULL;
     }
     mirror::ArtField* f = soa_.DecodeField(fid);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(f) || !f->IsArtField()) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f) || !f->IsArtField()) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "invalid jfieldID: %p", fid);
       return NULL;
@@ -710,7 +711,7 @@
       return NULL;
     }
     mirror::ArtMethod* m = soa_.DecodeMethod(mid);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(m) || !m->IsArtMethod()) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(m) || !m->IsArtMethod()) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "invalid jmethodID: %p", mid);
       return NULL;
@@ -731,7 +732,7 @@
     }
 
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       // TODO: when we remove work_around_app_jni_bugs, this should be impossible.
       JniAbortF(function_name_, "native code passing in reference to invalid %s: %p",
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index ad568b1..0436435 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -18,20 +18,21 @@
 #define ART_RUNTIME_CLASS_LINKER_INL_H_
 
 #include "class_linker.h"
-
 #include "mirror/art_field.h"
+#include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "mirror/iftable.h"
 #include "mirror/object_array.h"
+#include "sirt_ref.h"
 
 namespace art {
 
 inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx,
-                                           const mirror::ArtMethod* referrer) {
+                                                  const mirror::ArtMethod* referrer) {
   mirror::String* resolved_string = referrer->GetDexCacheStrings()->Get(string_idx);
   if (UNLIKELY(resolved_string == NULL)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    mirror::DexCache* dex_cache = declaring_class->GetDexCache();
+    SirtRef<mirror::DexCache> dex_cache(Thread::Current(), declaring_class->GetDexCache());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_string = ResolveString(dex_file, string_idx, dex_cache);
   }
@@ -43,8 +44,9 @@
   mirror::Class* resolved_type = referrer->GetDexCacheResolvedTypes()->Get(type_idx);
   if (UNLIKELY(resolved_type == NULL)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    mirror::DexCache* dex_cache = declaring_class->GetDexCache();
-    mirror::ClassLoader* class_loader = declaring_class->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::DexCache> dex_cache(self, declaring_class->GetDexCache());
+    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
   }
@@ -53,10 +55,12 @@
 
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, const mirror::ArtField* referrer) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
-  mirror::DexCache* dex_cache = declaring_class->GetDexCache();
-  mirror::Class* resolved_type = dex_cache->GetResolvedType(type_idx);
+  mirror::DexCache* dex_cache_ptr = declaring_class->GetDexCache();
+  mirror::Class* resolved_type = dex_cache_ptr->GetResolvedType(type_idx);
   if (UNLIKELY(resolved_type == NULL)) {
-    mirror::ClassLoader* class_loader = declaring_class->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::DexCache> dex_cache(self, dex_cache_ptr);
+    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
   }
@@ -70,8 +74,9 @@
       referrer->GetDexCacheResolvedMethods()->Get(method_idx);
   if (UNLIKELY(resolved_method == NULL || resolved_method->IsRuntimeMethod())) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    mirror::DexCache* dex_cache = declaring_class->GetDexCache();
-    mirror::ClassLoader* class_loader = declaring_class->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::DexCache> dex_cache(self, declaring_class->GetDexCache());
+    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_method = ResolveMethod(dex_file, method_idx, dex_cache, class_loader, referrer, type);
   }
@@ -81,12 +86,13 @@
 inline mirror::ArtField* ClassLinker::ResolveField(uint32_t field_idx,
                                                    const mirror::ArtMethod* referrer,
                                                    bool is_static) {
+  mirror::Class* declaring_class = referrer->GetDeclaringClass();
   mirror::ArtField* resolved_field =
-      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
+      declaring_class->GetDexCache()->GetResolvedField(field_idx);
   if (UNLIKELY(resolved_field == NULL)) {
-    mirror::Class* declaring_class = referrer->GetDeclaringClass();
-    mirror::DexCache* dex_cache = declaring_class->GetDexCache();
-    mirror::ClassLoader* class_loader = declaring_class->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::DexCache>  dex_cache(self, declaring_class->GetDexCache());
+    SirtRef<mirror::ClassLoader> class_loader(self, declaring_class->GetClassLoader());
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_field = ResolveField(dex_file, field_idx, dex_cache, class_loader, is_static);
   }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 184e5d4..500cb59 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -170,20 +170,6 @@
   "[Ljava/lang/StackTraceElement;",
 };
 
-ClassLinker* ClassLinker::CreateFromCompiler(const std::vector<const DexFile*>& boot_class_path,
-                                             InternTable* intern_table) {
-  CHECK_NE(boot_class_path.size(), 0U);
-  UniquePtr<ClassLinker> class_linker(new ClassLinker(intern_table));
-  class_linker->InitFromCompiler(boot_class_path);
-  return class_linker.release();
-}
-
-ClassLinker* ClassLinker::CreateFromImage(InternTable* intern_table) {
-  UniquePtr<ClassLinker> class_linker(new ClassLinker(intern_table));
-  class_linker->InitFromImage();
-  return class_linker.release();
-}
-
 ClassLinker::ClassLinker(InternTable* intern_table)
     // dex_lock_ is recursive as it may be used in stack dumping.
     : dex_lock_("ClassLinker dex lock", kDefaultMutexLevel),
@@ -211,14 +197,15 @@
   // java_lang_Class comes first, it's needed for AllocClass
   Thread* self = Thread::Current();
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  SirtRef<mirror::Class>
-      java_lang_Class(self,
-                      down_cast<mirror::Class*>(heap->AllocObject(self, NULL,
-                                                                  sizeof(mirror::ClassClass))));
+  // The GC can't handle an object with a null class since we can't get the size of this object.
+  heap->IncrementDisableGC(self);
+  SirtRef<mirror::Class> java_lang_Class(self, down_cast<mirror::Class*>(
+      heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass))));
   CHECK(java_lang_Class.get() != NULL);
   mirror::Class::SetClassClass(java_lang_Class.get());
   java_lang_Class->SetClass(java_lang_Class.get());
   java_lang_Class->SetClassSize(sizeof(mirror::ClassClass));
+  heap->DecrementDisableGC(self);
   // AllocClass(mirror::Class*) can now be used
 
   // Class[] is used for reflection support.
@@ -251,7 +238,8 @@
   java_lang_String->SetStatus(mirror::Class::kStatusResolved, self);
 
   // Create storage for root classes, save away our work so far (requires descriptors).
-  class_roots_ = mirror::ObjectArray<mirror::Class>::Alloc(self, object_array_class.get(), kClassRootsMax);
+  class_roots_ = mirror::ObjectArray<mirror::Class>::Alloc(self, object_array_class.get(),
+                                                           kClassRootsMax);
   CHECK(class_roots_ != NULL);
   SetClassRoot(kJavaLangClass, java_lang_Class.get());
   SetClassRoot(kJavaLangObject, java_lang_Object.get());
@@ -401,7 +389,7 @@
   array_iftable_->SetInterface(1, java_io_Serializable);
 
   // Sanity check Class[] and Object[]'s interfaces.
-  ClassHelper kh(class_array_class.get(), this);
+  ClassHelper kh(class_array_class.get());
   CHECK_EQ(java_lang_Cloneable, kh.GetDirectInterface(0));
   CHECK_EQ(java_io_Serializable, kh.GetDirectInterface(1));
   kh.ChangeClass(object_array_class.get());
@@ -487,7 +475,7 @@
       FindSystemClass("Ljava/lang/ref/FinalizerReference;");
 
   mirror::ArtField* pendingNext = java_lang_ref_Reference->GetInstanceField(0);
-  FieldHelper fh(pendingNext, this);
+  FieldHelper fh(pendingNext);
   CHECK_STREQ(fh.GetName(), "pendingNext");
   CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
 
@@ -1043,6 +1031,7 @@
   VLOG(startup) << "ClassLinker::InitFromImage entering";
   CHECK(!init_done_);
 
+  Thread* self = Thread::Current();
   gc::Heap* heap = Runtime::Current()->GetHeap();
   gc::space::ImageSpace* space = heap->GetImageSpace();
   dex_cache_image_class_lookup_required_ = true;
@@ -1059,9 +1048,10 @@
   mirror::ObjectArray<mirror::DexCache>* dex_caches =
       dex_caches_object->AsObjectArray<mirror::DexCache>();
 
-  mirror::ObjectArray<mirror::Class>* class_roots =
-      space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->AsObjectArray<mirror::Class>();
-  class_roots_ = class_roots;
+  SirtRef<mirror::ObjectArray<mirror::Class> > class_roots(
+      self,
+      space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->AsObjectArray<mirror::Class>());
+  class_roots_ = class_roots.get();
 
   // Special case of setting up the String class early so that we can test arbitrary objects
   // as being Strings or not
@@ -1069,7 +1059,6 @@
 
   CHECK_EQ(oat_file.GetOatHeader().GetDexFileCount(),
            static_cast<uint32_t>(dex_caches->GetLength()));
-  Thread* self = Thread::Current();
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     SirtRef<mirror::DexCache> dex_cache(self, dex_caches->Get(i));
     const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
@@ -1096,13 +1085,12 @@
   // Set entry point to interpreter if in InterpretOnly mode.
   if (Runtime::Current()->GetInstrumentation()->InterpretOnly()) {
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    heap->FlushAllocStack();
-    heap->GetLiveBitmap()->Walk(InitFromImageInterpretOnlyCallback, this);
+    heap->VisitObjects(InitFromImageInterpretOnlyCallback, this);
   }
 
   // reinit class_roots_
   mirror::Class::SetClassClass(class_roots->Get(kJavaLangClass));
-  class_roots_ = class_roots;
+  class_roots_ = class_roots.get();
 
   // reinit array_iftable_ from any array class instance, they should be ==
   array_iftable_ = GetClassRoot(kObjectArrayClass)->GetIfTable();
@@ -1192,7 +1180,6 @@
   }
 }
 
-
 ClassLinker::~ClassLinker() {
   mirror::Class::ResetClass();
   mirror::String::ResetClass();
@@ -1214,10 +1201,10 @@
 
 mirror::DexCache* ClassLinker::AllocDexCache(Thread* self, const DexFile& dex_file) {
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  mirror::Class* dex_cache_class = GetClassRoot(kJavaLangDexCache);
-  SirtRef<mirror::DexCache> dex_cache(self,
-                              down_cast<mirror::DexCache*>(heap->AllocObject(self, dex_cache_class,
-                                                                dex_cache_class->GetObjectSize())));
+  SirtRef<mirror::Class> dex_cache_class(self, GetClassRoot(kJavaLangDexCache));
+  SirtRef<mirror::DexCache> dex_cache(
+      self, down_cast<mirror::DexCache*>(
+          heap->AllocObject<true>(self, dex_cache_class.get(), dex_cache_class->GetObjectSize())));
   if (dex_cache.get() == NULL) {
     return NULL;
   }
@@ -1253,13 +1240,8 @@
     return NULL;
   }
 
-  dex_cache->Init(&dex_file,
-                  location.get(),
-                  strings.get(),
-                  types.get(),
-                  methods.get(),
-                  fields.get(),
-                  initialized_static_storage.get());
+  dex_cache->Init(&dex_file, location.get(), strings.get(), types.get(), methods.get(),
+                  fields.get(), initialized_static_storage.get());
   return dex_cache.get();
 }
 
@@ -1267,7 +1249,7 @@
                                        size_t class_size) {
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  mirror::Object* k = heap->AllocObject(self, java_lang_Class, class_size);
+  mirror::Object* k = heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size);
   if (UNLIKELY(k == NULL)) {
     CHECK(self->IsExceptionPending());  // OOME.
     return NULL;
@@ -1285,18 +1267,19 @@
 }
 
 mirror::ArtField* ClassLinker::AllocArtField(Thread* self) {
-  return down_cast<mirror::ArtField*>(GetClassRoot(kJavaLangReflectArtField)->AllocObject(self));
+  return down_cast<mirror::ArtField*>(
+      GetClassRoot(kJavaLangReflectArtField)->AllocNonMovableObject(self));
 }
 
 mirror::ArtMethod* ClassLinker::AllocArtMethod(Thread* self) {
-  return down_cast<mirror::ArtMethod*>(GetClassRoot(kJavaLangReflectArtMethod)->AllocObject(self));
+  return down_cast<mirror::ArtMethod*>(
+      GetClassRoot(kJavaLangReflectArtMethod)->AllocNonMovableObject(self));
 }
 
-mirror::ObjectArray<mirror::StackTraceElement>* ClassLinker::AllocStackTraceElementArray(Thread* self,
-                                                                                         size_t length) {
-  return mirror::ObjectArray<mirror::StackTraceElement>::Alloc(self,
-                                                               GetClassRoot(kJavaLangStackTraceElementArrayClass),
-                                                               length);
+mirror::ObjectArray<mirror::StackTraceElement>* ClassLinker::AllocStackTraceElementArray(
+    Thread* self, size_t length) {
+  return mirror::ObjectArray<mirror::StackTraceElement>::Alloc(
+      self, GetClassRoot(kJavaLangStackTraceElementArrayClass), length);
 }
 
 static mirror::Class* EnsureResolved(Thread* self, mirror::Class* klass)
@@ -1332,10 +1315,12 @@
 }
 
 mirror::Class* ClassLinker::FindSystemClass(const char* descriptor) {
-  return FindClass(descriptor, NULL);
+  SirtRef<mirror::ClassLoader> class_loader(Thread::Current(), nullptr);
+  return FindClass(descriptor, class_loader);
 }
 
-mirror::Class* ClassLinker::FindClass(const char* descriptor, mirror::ClassLoader* class_loader) {
+mirror::Class* ClassLinker::FindClass(const char* descriptor,
+                                      SirtRef<mirror::ClassLoader>& class_loader) {
   DCHECK_NE(*descriptor, '\0') << "descriptor is empty string";
   Thread* self = Thread::Current();
   DCHECK(self != NULL);
@@ -1346,20 +1331,19 @@
     return FindPrimitiveClass(descriptor[0]);
   }
   // Find the class in the loaded classes table.
-  mirror::Class* klass = LookupClass(descriptor, class_loader);
+  mirror::Class* klass = LookupClass(descriptor, class_loader.get());
   if (klass != NULL) {
     return EnsureResolved(self, klass);
   }
   // Class is not yet loaded.
   if (descriptor[0] == '[') {
     return CreateArrayClass(descriptor, class_loader);
-
-  } else if (class_loader == NULL) {
+  } else if (class_loader.get() == nullptr) {
     DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, boot_class_path_);
     if (pair.second != NULL) {
-      return DefineClass(descriptor, NULL, *pair.first, *pair.second);
+      SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
+      return DefineClass(descriptor, class_loader, *pair.first, *pair.second);
     }
-
   } else if (Runtime::Current()->UseCompileTimeClassPath()) {
     // First try the boot class path, we check the descriptor first to avoid an unnecessary
     // throw of a NoClassDefFoundError.
@@ -1372,7 +1356,8 @@
     const std::vector<const DexFile*>* class_path;
     {
       ScopedObjectAccessUnchecked soa(self);
-      ScopedLocalRef<jobject> jclass_loader(soa.Env(), soa.AddLocalReference<jobject>(class_loader));
+      ScopedLocalRef<jobject> jclass_loader(soa.Env(),
+                                            soa.AddLocalReference<jobject>(class_loader.get()));
       class_path = &Runtime::Current()->GetCompileTimeClassPath(jclass_loader.get());
     }
 
@@ -1384,7 +1369,7 @@
   } else {
     ScopedObjectAccessUnchecked soa(self->GetJniEnv());
     ScopedLocalRef<jobject> class_loader_object(soa.Env(),
-                                                soa.AddLocalReference<jobject>(class_loader));
+                                                soa.AddLocalReference<jobject>(class_loader.get()));
     std::string class_name_string(DescriptorToDot(descriptor));
     ScopedLocalRef<jobject> result(soa.Env(), NULL);
     {
@@ -1418,7 +1403,7 @@
 }
 
 mirror::Class* ClassLinker::DefineClass(const char* descriptor,
-                                        mirror::ClassLoader* class_loader,
+                                        SirtRef<mirror::ClassLoader>& class_loader,
                                         const DexFile& dex_file,
                                         const DexFile::ClassDef& dex_class_def) {
   Thread* self = Thread::Current();
@@ -1449,7 +1434,7 @@
     return NULL;
   }
   klass->SetDexCache(FindDexCache(dex_file));
-  LoadClass(dex_file, dex_class_def, klass, class_loader);
+  LoadClass(dex_file, dex_class_def, klass, class_loader.get());
   // Check for a pending exception during load
   if (self->IsExceptionPending()) {
     klass->SetStatus(mirror::Class::kStatusError, self);
@@ -1457,14 +1442,12 @@
   }
   ObjectLock lock(self, klass.get());
   klass->SetClinitThreadId(self->GetTid());
-  {
-    // Add the newly loaded class to the loaded classes table.
-    mirror::Class* existing = InsertClass(descriptor, klass.get(), Hash(descriptor));
-    if (existing != NULL) {
-      // We failed to insert because we raced with another thread. Calling EnsureResolved may cause
-      // this thread to block.
-      return EnsureResolved(self, existing);
-    }
+  // Add the newly loaded class to the loaded classes table.
+  mirror::Class* existing = InsertClass(descriptor, klass.get(), Hash(descriptor));
+  if (existing != NULL) {
+    // We failed to insert because we raced with another thread. Calling EnsureResolved may cause
+    // this thread to block.
+    return EnsureResolved(self, existing);
   }
   // Finish loading (if necessary) by finding parents
   CHECK(!klass->IsLoaded());
@@ -1476,7 +1459,9 @@
   CHECK(klass->IsLoaded());
   // Link the class (if necessary)
   CHECK(!klass->IsResolved());
-  if (!LinkClass(klass, NULL, self)) {
+  // TODO: Use fast jobjects?
+  SirtRef<mirror::ObjectArray<mirror::Class> > interfaces(self, nullptr);
+  if (!LinkClass(self, klass, interfaces)) {
     // Linking failed.
     klass->SetStatus(mirror::Class::kStatusError, self);
     return NULL;
@@ -2083,7 +2068,7 @@
 //
 // Returns NULL with an exception raised on failure.
 mirror::Class* ClassLinker::CreateArrayClass(const char* descriptor,
-                                             mirror::ClassLoader* class_loader) {
+                                             SirtRef<mirror::ClassLoader>& class_loader) {
   // Identify the underlying component type
   CHECK_EQ('[', descriptor[0]);
   mirror::Class* component_type = FindClass(descriptor + 1, class_loader);
@@ -2109,7 +2094,7 @@
   // because we effectively do this lookup again when we add the new
   // class to the hash table --- necessary because of possible races with
   // other threads.)
-  if (class_loader != component_type->GetClassLoader()) {
+  if (class_loader.get() != component_type->GetClassLoader()) {
     mirror::Class* new_class = LookupClass(descriptor, component_type->GetClassLoader());
     if (new_class != NULL) {
       return new_class;
@@ -2266,11 +2251,10 @@
 bool ClassLinker::RemoveClass(const char* descriptor, const mirror::ClassLoader* class_loader) {
   size_t hash = Hash(descriptor);
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  ClassHelper kh;
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end(); it != end && it->first == hash;
        ++it) {
     mirror::Class* klass = it->second;
-    kh.ChangeClass(klass);
+    ClassHelper kh(klass);
     if ((klass->GetClassLoader() == class_loader) &&
         (strcmp(descriptor, kh.GetDescriptor()) == 0)) {
       class_table_.erase(it);
@@ -2313,18 +2297,17 @@
 mirror::Class* ClassLinker::LookupClassFromTableLocked(const char* descriptor,
                                                        const mirror::ClassLoader* class_loader,
                                                        size_t hash) {
-  ClassHelper kh(NULL, this);
   auto end = class_table_.end();
   for (auto it = class_table_.lower_bound(hash); it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
-    kh.ChangeClass(klass);
+    ClassHelper kh(klass);
     if ((klass->GetClassLoader() == class_loader) &&
         (strcmp(descriptor, kh.GetDescriptor()) == 0)) {
       if (kIsDebugBuild) {
         // Check for duplicates in the table.
         for (++it; it != end && it->first == hash; ++it) {
           mirror::Class* klass2 = it->second;
-          kh.ChangeClass(klass2);
+          ClassHelper kh(klass2);
           CHECK(!((klass2->GetClassLoader() == class_loader) &&
                   (strcmp(descriptor, kh.GetDescriptor()) == 0)))
               << PrettyClass(klass) << " " << klass << " " << klass->GetClassLoader() << " "
@@ -2354,14 +2337,13 @@
   const char* old_no_suspend_cause =
       self->StartAssertNoThreadSuspension("Moving image classes to class table");
   mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
-  ClassHelper kh(NULL, this);
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     mirror::ObjectArray<mirror::Class>* types = dex_cache->GetResolvedTypes();
     for (int32_t j = 0; j < types->GetLength(); j++) {
       mirror::Class* klass = types->Get(j);
       if (klass != NULL) {
-        kh.ChangeClass(klass);
+        ClassHelper kh(klass);
         DCHECK(klass->GetClassLoader() == NULL);
         const char* descriptor = kh.GetDescriptor();
         size_t hash = Hash(descriptor);
@@ -2429,11 +2411,10 @@
   }
   size_t hash = Hash(descriptor);
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  ClassHelper kh(NULL, this);
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
       it != end && it->first == hash; ++it) {
     mirror::Class* klass = it->second;
-    kh.ChangeClass(klass);
+    ClassHelper kh(klass);
     if (strcmp(descriptor, kh.GetDescriptor()) == 0) {
       result.push_back(klass);
     }
@@ -2687,12 +2668,10 @@
 static void CheckProxyMethod(mirror::ArtMethod* method,
                              SirtRef<mirror::ArtMethod>& prototype);
 
-mirror::Class* ClassLinker::CreateProxyClass(mirror::String* name,
-                                             mirror::ObjectArray<mirror::Class>* interfaces,
-                                             mirror::ClassLoader* loader,
-                                             mirror::ObjectArray<mirror::ArtMethod>* methods,
-                                             mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >* throws) {
-  Thread* self = Thread::Current();
+mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccess& soa, jstring name,
+                                             jobjectArray interfaces, jobject loader,
+                                             jobjectArray methods, jobjectArray throws) {
+  Thread* self = soa.Self();
   SirtRef<mirror::Class> klass(self, AllocClass(self, GetClassRoot(kJavaLangClass),
                                                 sizeof(mirror::SynthesizedProxyClass)));
   if (klass.get() == NULL) {
@@ -2702,9 +2681,9 @@
   DCHECK(klass->GetClass() != NULL);
   klass->SetObjectSize(sizeof(mirror::Proxy));
   klass->SetAccessFlags(kAccClassIsProxy | kAccPublic | kAccFinal);
-  klass->SetClassLoader(loader);
+  klass->SetClassLoader(soa.Decode<mirror::ClassLoader*>(loader));
   DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
-  klass->SetName(name);
+  klass->SetName(soa.Decode<mirror::String*>(name));
   mirror::Class* proxy_class = GetClassRoot(kJavaLangReflectProxy);
   klass->SetDexCache(proxy_class->GetDexCache());
   klass->SetStatus(mirror::Class::kStatusIdx, self);
@@ -2742,8 +2721,7 @@
 
   // Proxies have 1 direct method, the constructor
   {
-    mirror::ObjectArray<mirror::ArtMethod>* directs =
-      AllocArtMethodArray(self, 1);
+    mirror::ObjectArray<mirror::ArtMethod>* directs = AllocArtMethodArray(self, 1);
     if (UNLIKELY(directs == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return NULL;
@@ -2757,11 +2735,11 @@
     klass->SetDirectMethod(0, constructor);
   }
 
-  // Create virtual method using specified prototypes
-  size_t num_virtual_methods = methods->GetLength();
+  // Create virtual method using specified prototypes.
+  size_t num_virtual_methods =
+      soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods)->GetLength();
   {
-    mirror::ObjectArray<mirror::ArtMethod>* virtuals =
-        AllocArtMethodArray(self, num_virtual_methods);
+    mirror::ObjectArray<mirror::ArtMethod>* virtuals = AllocArtMethodArray(self, num_virtual_methods);
     if (UNLIKELY(virtuals == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return NULL;
@@ -2769,7 +2747,9 @@
     klass->SetVirtualMethods(virtuals);
   }
   for (size_t i = 0; i < num_virtual_methods; ++i) {
-    SirtRef<mirror::ArtMethod> prototype(self, methods->Get(i));
+    mirror::ObjectArray<mirror::ArtMethod>* decoded_methods =
+        soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods);
+    SirtRef<mirror::ArtMethod> prototype(self, decoded_methods->Get(i));
     mirror::ArtMethod* clone = CreateProxyMethod(self, klass, prototype);
     if (UNLIKELY(clone == NULL)) {
       CHECK(self->IsExceptionPending());  // OOME.
@@ -2785,13 +2765,15 @@
   {
     ObjectLock lock(self, klass.get());  // Must hold lock on object when resolved.
     // Link the fields and virtual methods, creating vtable and iftables
-    if (!LinkClass(klass, interfaces, self)) {
+    SirtRef<mirror::ObjectArray<mirror::Class> > sirt_interfaces(
+        self, soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    if (!LinkClass(self, klass, sirt_interfaces)) {
       klass->SetStatus(mirror::Class::kStatusError, self);
       return NULL;
     }
 
-    interfaces_sfield->SetObject(klass.get(), interfaces);
-    throws_sfield->SetObject(klass.get(), throws);
+    interfaces_sfield->SetObject(klass.get(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    throws_sfield->SetObject(klass.get(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
     klass->SetStatus(mirror::Class::kStatusInitialized, self);
   }
 
@@ -2800,22 +2782,25 @@
     CHECK(klass->GetIFields() == NULL);
     CheckProxyConstructor(klass->GetDirectMethod(0));
     for (size_t i = 0; i < num_virtual_methods; ++i) {
-      SirtRef<mirror::ArtMethod> prototype(self, methods->Get(i));
+      mirror::ObjectArray<mirror::ArtMethod>* decoded_methods =
+          soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(methods);
+      SirtRef<mirror::ArtMethod> prototype(self, decoded_methods->Get(i));
       CheckProxyMethod(klass->GetVirtualMethod(i), prototype);
     }
 
+    mirror::String* decoded_name = soa.Decode<mirror::String*>(name);
     std::string interfaces_field_name(StringPrintf("java.lang.Class[] %s.interfaces",
-                                                   name->ToModifiedUtf8().c_str()));
+                                                   decoded_name->ToModifiedUtf8().c_str()));
     CHECK_EQ(PrettyField(klass->GetStaticField(0)), interfaces_field_name);
 
     std::string throws_field_name(StringPrintf("java.lang.Class[][] %s.throws",
-                                               name->ToModifiedUtf8().c_str()));
+                                               decoded_name->ToModifiedUtf8().c_str()));
     CHECK_EQ(PrettyField(klass->GetStaticField(1)), throws_field_name);
 
     mirror::SynthesizedProxyClass* synth_proxy_class =
         down_cast<mirror::SynthesizedProxyClass*>(klass.get());
-    CHECK_EQ(synth_proxy_class->GetInterfaces(), interfaces);
-    CHECK_EQ(synth_proxy_class->GetThrows(), throws);
+    CHECK_EQ(synth_proxy_class->GetInterfaces(), soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
+    CHECK_EQ(synth_proxy_class->GetThrows(), soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
   }
   std::string descriptor(GetDescriptorForProxy(klass.get()));
   mirror::Class* existing = InsertClass(descriptor.c_str(), klass.get(), Hash(descriptor.c_str()));
@@ -2977,6 +2962,10 @@
   return true;
 }
 
+bool ClassLinker::IsInitialized() const {
+  return init_done_;
+}
+
 bool ClassLinker::InitializeClass(mirror::Class* klass, bool can_init_statics,
                                   bool can_init_parents) {
   // see JLS 3rd edition, 12.4.2 "Detailed Initialization Procedure" for the locking protocol
@@ -3084,7 +3073,9 @@
     const DexFile::ClassDef* dex_class_def = kh.GetClassDef();
     CHECK(dex_class_def != NULL);
     const DexFile& dex_file = kh.GetDexFile();
-    EncodedStaticFieldValueIterator it(dex_file, kh.GetDexCache(), klass->GetClassLoader(),
+    SirtRef<mirror::ClassLoader> class_loader(self, klass->GetClassLoader());
+    SirtRef<mirror::DexCache> dex_cache(self, kh.GetDexCache());
+    EncodedStaticFieldValueIterator it(dex_file, &dex_cache, &class_loader,
                                        this, *dex_class_def);
     if (it.HasNext()) {
       CHECK(can_init_statics);
@@ -3196,12 +3187,11 @@
       }
     }
   }
-  mirror::IfTable* iftable = klass->GetIfTable();
   for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-    mirror::Class* interface = iftable->GetInterface(i);
+    mirror::Class* interface = klass->GetIfTable()->GetInterface(i);
     if (klass->GetClassLoader() != interface->GetClassLoader()) {
       for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-        const mirror::ArtMethod* method = iftable->GetMethodArray(i)->Get(j);
+        const mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
         if (!IsSameMethodSignatureInDifferentClassContexts(method, interface,
                                                            method->GetDeclaringClass())) {
           ThrowLinkageError(klass, "Class %s method %s resolves differently in interface %s",
@@ -3259,11 +3249,14 @@
   if (klass1 == klass2) {
     return true;
   }
-  mirror::Class* found1 = FindClass(descriptor, klass1->GetClassLoader());
+  Thread* self = Thread::Current();
+  SirtRef<mirror::ClassLoader> class_loader1(self, klass1->GetClassLoader());
+  mirror::Class* found1 = FindClass(descriptor, class_loader1);
   if (found1 == NULL) {
     Thread::Current()->ClearException();
   }
-  mirror::Class* found2 = FindClass(descriptor, klass2->GetClassLoader());
+  SirtRef<mirror::ClassLoader> class_loader2(self, klass2->GetClassLoader());
+  mirror::Class* found2 = FindClass(descriptor, class_loader2);
   if (found2 == NULL) {
     Thread::Current()->ClearException();
   }
@@ -3285,17 +3278,20 @@
 }
 
 void ClassLinker::ConstructFieldMap(const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
-                                    mirror::Class* c, SafeMap<uint32_t, mirror::ArtField*>& field_map) {
-  mirror::ClassLoader* cl = c->GetClassLoader();
+                                    mirror::Class* c,
+                                    SafeMap<uint32_t, mirror::ArtField*>& field_map) {
   const byte* class_data = dex_file.GetClassData(dex_class_def);
   ClassDataItemIterator it(dex_file, class_data);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, c->GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, c->GetClassLoader());
   for (size_t i = 0; it.HasNextStaticField(); i++, it.Next()) {
-    field_map.Put(i, ResolveField(dex_file, it.GetMemberIndex(), c->GetDexCache(), cl, true));
+    field_map.Put(i, ResolveField(dex_file, it.GetMemberIndex(), dex_cache, class_loader, true));
   }
 }
 
-bool ClassLinker::LinkClass(SirtRef<mirror::Class>& klass,
-                            mirror::ObjectArray<mirror::Class>* interfaces, Thread* self) {
+bool ClassLinker::LinkClass(Thread* self, SirtRef<mirror::Class>& klass,
+                            SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces) {
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
   if (!LinkSuperClass(klass)) {
     return false;
@@ -3419,7 +3415,7 @@
 
 // Populate the class vtable and itable. Compute return type indices.
 bool ClassLinker::LinkMethods(SirtRef<mirror::Class>& klass,
-                              mirror::ObjectArray<mirror::Class>* interfaces) {
+                              SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces) {
   if (klass->IsInterface()) {
     // No vtable.
     size_t count = klass->NumVirtualMethods();
@@ -3453,15 +3449,13 @@
       return false;
     }
     // See if any of our virtual methods override the superclass.
-    MethodHelper local_mh(NULL, this);
-    MethodHelper super_mh(NULL, this);
     for (size_t i = 0; i < klass->NumVirtualMethods(); ++i) {
       mirror::ArtMethod* local_method = klass->GetVirtualMethodDuringLinking(i);
-      local_mh.ChangeMethod(local_method);
+      MethodHelper local_mh(local_method);
       size_t j = 0;
       for (; j < actual_count; ++j) {
         mirror::ArtMethod* super_method = vtable->Get(j);
-        super_mh.ChangeMethod(super_method);
+        MethodHelper super_mh(super_method);
         if (local_mh.HasSameNameAndSignature(&super_mh)) {
           if (klass->CanAccessMember(super_method->GetDeclaringClass(), super_method->GetAccessFlags())) {
             if (super_method->IsFinal()) {
@@ -3525,7 +3519,7 @@
 }
 
 bool ClassLinker::LinkInterfaceMethods(SirtRef<mirror::Class>& klass,
-                                       mirror::ObjectArray<mirror::Class>* interfaces) {
+                                       SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces) {
   // Set the imt table to be all conflicts by default.
   klass->SetImTable(Runtime::Current()->GetDefaultImt());
   size_t super_ifcount;
@@ -3535,11 +3529,13 @@
     super_ifcount = 0;
   }
   size_t ifcount = super_ifcount;
-  ClassHelper kh(klass.get(), this);
-  uint32_t num_interfaces = interfaces == NULL ? kh.NumDirectInterfaces() : interfaces->GetLength();
+  ClassHelper kh(klass.get());
+  uint32_t num_interfaces =
+      interfaces.get() == nullptr ? kh.NumDirectInterfaces() : interfaces->GetLength();
   ifcount += num_interfaces;
   for (size_t i = 0; i < num_interfaces; i++) {
-    mirror::Class* interface = interfaces == NULL ? kh.GetDirectInterface(i) : interfaces->Get(i);
+    mirror::Class* interface =
+        interfaces.get() == nullptr ? kh.GetDirectInterface(i) : interfaces->Get(i);
     ifcount += interface->GetIfTableCount();
   }
   if (ifcount == 0) {
@@ -3580,7 +3576,8 @@
   // Flatten the interface inheritance hierarchy.
   size_t idx = super_ifcount;
   for (size_t i = 0; i < num_interfaces; i++) {
-    mirror::Class* interface = interfaces == NULL ? kh.GetDirectInterface(i) : interfaces->Get(i);
+    mirror::Class* interface =
+        interfaces.get() == nullptr ? kh.GetDirectInterface(i) : interfaces->Get(i);
     DCHECK(interface != NULL);
     if (!interface->IsInterface()) {
       ClassHelper ih(interface);
@@ -3643,20 +3640,21 @@
     return false;
   }
   std::vector<mirror::ArtMethod*> miranda_list;
-  MethodHelper vtable_mh(NULL, this);
-  MethodHelper interface_mh(NULL, this);
+  MethodHelper vtable_mh(NULL);
+  MethodHelper interface_mh(NULL);
   for (size_t i = 0; i < ifcount; ++i) {
     mirror::Class* interface = iftable->GetInterface(i);
     size_t num_methods = interface->NumVirtualMethods();
     if (num_methods > 0) {
-      mirror::ObjectArray<mirror::ArtMethod>* method_array =
-          AllocArtMethodArray(self, num_methods);
-      if (UNLIKELY(method_array == NULL)) {
+      SirtRef<mirror::ObjectArray<mirror::ArtMethod> >
+          method_array(self, AllocArtMethodArray(self, num_methods));
+      if (UNLIKELY(method_array.get() == nullptr)) {
         CHECK(self->IsExceptionPending());  // OOME.
         return false;
       }
-      iftable->SetMethodArray(i, method_array);
-      mirror::ObjectArray<mirror::ArtMethod>* vtable = klass->GetVTableDuringLinking();
+      iftable->SetMethodArray(i, method_array.get());
+      SirtRef<mirror::ObjectArray<mirror::ArtMethod> > vtable(self,
+                                                              klass->GetVTableDuringLinking());
       for (size_t j = 0; j < num_methods; ++j) {
         mirror::ArtMethod* interface_method = interface->GetVirtualMethod(j);
         interface_mh.ChangeMethod(interface_method);
@@ -3709,10 +3707,7 @@
               CHECK(self->IsExceptionPending());  // OOME.
               return false;
             }
-#ifdef MOVING_GARBAGE_COLLECTOR
             // TODO: If a methods move then the miranda_list may hold stale references.
-            UNIMPLEMENTED(FATAL);
-#endif
             miranda_list.push_back(miranda_method.get());
           }
           method_array->Set(j, miranda_method.get());
@@ -3791,17 +3786,16 @@
 }
 
 struct LinkFieldsComparator {
-  explicit LinkFieldsComparator(FieldHelper* fh)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : fh_(fh) {}
+  explicit LinkFieldsComparator() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  }
   // No thread safety analysis as will be called from STL. Checked lock held in constructor.
   bool operator()(const mirror::ArtField* field1, const mirror::ArtField* field2)
       NO_THREAD_SAFETY_ANALYSIS {
     // First come reference fields, then 64-bit, and finally 32-bit
-    fh_->ChangeField(field1);
-    Primitive::Type type1 = fh_->GetTypeAsPrimitiveType();
-    fh_->ChangeField(field2);
-    Primitive::Type type2 = fh_->GetTypeAsPrimitiveType();
+    FieldHelper fh1(field1);
+    Primitive::Type type1 = fh1.GetTypeAsPrimitiveType();
+    FieldHelper fh2(field2);
+    Primitive::Type type2 = fh2.GetTypeAsPrimitiveType();
     bool isPrimitive1 = type1 != Primitive::kPrimNot;
     bool isPrimitive2 = type2 != Primitive::kPrimNot;
     bool is64bit1 = isPrimitive1 && (type1 == Primitive::kPrimLong || type1 == Primitive::kPrimDouble);
@@ -3813,14 +3807,10 @@
     }
 
     // same basic group? then sort by string.
-    fh_->ChangeField(field1);
-    const char* name1 = fh_->GetName();
-    fh_->ChangeField(field2);
-    const char* name2 = fh_->GetName();
+    const char* name1 = fh1.GetName();
+    const char* name2 = fh2.GetName();
     return strcmp(name1, name2) < 0;
   }
-
-  FieldHelper* fh_;
 };
 
 bool ClassLinker::LinkFields(SirtRef<mirror::Class>& klass, bool is_static) {
@@ -3855,17 +3845,15 @@
     CHECK(f != NULL);
     grouped_and_sorted_fields.push_back(f);
   }
-  FieldHelper fh(NULL, this);
-  std::sort(grouped_and_sorted_fields.begin(),
-            grouped_and_sorted_fields.end(),
-            LinkFieldsComparator(&fh));
+  std::sort(grouped_and_sorted_fields.begin(), grouped_and_sorted_fields.end(),
+            LinkFieldsComparator());
 
   // References should be at the front.
   size_t current_field = 0;
   size_t num_reference_fields = 0;
   for (; current_field < num_fields; current_field++) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
-    fh.ChangeField(field);
+    FieldHelper fh(field);
     Primitive::Type type = fh.GetTypeAsPrimitiveType();
     bool isPrimitive = type != Primitive::kPrimNot;
     if (isPrimitive) {
@@ -3884,7 +3872,7 @@
   if (current_field != num_fields && !IsAligned<8>(field_offset.Uint32Value())) {
     for (size_t i = 0; i < grouped_and_sorted_fields.size(); i++) {
       mirror::ArtField* field = grouped_and_sorted_fields[i];
-      fh.ChangeField(field);
+      FieldHelper fh(field);
       Primitive::Type type = fh.GetTypeAsPrimitiveType();
       CHECK(type != Primitive::kPrimNot);  // should only be working on primitive types
       if (type == Primitive::kPrimLong || type == Primitive::kPrimDouble) {
@@ -3906,7 +3894,7 @@
   while (!grouped_and_sorted_fields.empty()) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
     grouped_and_sorted_fields.pop_front();
-    fh.ChangeField(field);
+    FieldHelper fh(field);
     Primitive::Type type = fh.GetTypeAsPrimitiveType();
     CHECK(type != Primitive::kPrimNot);  // should only be working on primitive types
     fields->Set(current_field, field);
@@ -3920,11 +3908,11 @@
 
   // We lie to the GC about the java.lang.ref.Reference.referent field, so it doesn't scan it.
   if (!is_static &&
-      (strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get(), this).GetDescriptor()) == 0)) {
+      (strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get()).GetDescriptor()) == 0)) {
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
     CHECK_EQ(num_reference_fields, num_fields);
-    fh.ChangeField(fields->Get(num_fields - 1));
+    FieldHelper fh(fields->Get(num_fields - 1));
     CHECK_STREQ(fh.GetName(), "referent");
     --num_reference_fields;
   }
@@ -3942,10 +3930,10 @@
                     << " offset=" << field->GetField32(MemberOffset(mirror::ArtField::OffsetOffset()),
                                                        false);
       }
-      fh.ChangeField(field);
+      FieldHelper fh(field);
       Primitive::Type type = fh.GetTypeAsPrimitiveType();
       bool is_primitive = type != Primitive::kPrimNot;
-      if ((strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get(), this).GetDescriptor()) == 0)
+      if ((strcmp("Ljava/lang/ref/Reference;", ClassHelper(klass.get()).GetDescriptor()) == 0)
           && (strcmp("referent", fh.GetName()) == 0)) {
         is_primitive = true;  // We lied above, so we have to expect a lie here.
       }
@@ -3970,7 +3958,7 @@
   } else {
     klass->SetNumReferenceInstanceFields(num_reference_fields);
     if (!klass->IsVariableSize()) {
-      DCHECK_GE(size, sizeof(mirror::Object)) << ClassHelper(klass.get(), this).GetDescriptor();
+      DCHECK_GE(size, sizeof(mirror::Object)) << ClassHelper(klass.get()).GetDescriptor();
       size_t previous_size = klass->GetObjectSize();
       if (previous_size != 0) {
         // Make sure that we didn't originally have an incorrect size.
@@ -4034,9 +4022,9 @@
   }
 }
 
-mirror::String* ClassLinker::ResolveString(const DexFile& dex_file,
-                                           uint32_t string_idx, mirror::DexCache* dex_cache) {
-  DCHECK(dex_cache != NULL);
+mirror::String* ClassLinker::ResolveString(const DexFile& dex_file, uint32_t string_idx,
+                                           SirtRef<mirror::DexCache>& dex_cache) {
+  DCHECK(dex_cache.get() != nullptr);
   mirror::String* resolved = dex_cache->GetResolvedString(string_idx);
   if (resolved != NULL) {
     return resolved;
@@ -4048,11 +4036,18 @@
   return string;
 }
 
-mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
-                                        uint16_t type_idx,
-                                        mirror::DexCache* dex_cache,
-                                        mirror::ClassLoader* class_loader) {
-  DCHECK(dex_cache != NULL);
+mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
+                                        const mirror::Class* referrer) {
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, referrer->GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, referrer->GetClassLoader());
+  return ResolveType(dex_file, type_idx, dex_cache, class_loader);
+}
+
+mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
+                                        SirtRef<mirror::DexCache>& dex_cache,
+                                        SirtRef<mirror::ClassLoader>& class_loader) {
+  DCHECK(dex_cache.get() != NULL);
   mirror::Class* resolved = dex_cache->GetResolvedType(type_idx);
   if (resolved == NULL) {
     const char* descriptor = dex_file.StringByTypeIdx(type_idx);
@@ -4082,11 +4077,11 @@
 
 mirror::ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
                                               uint32_t method_idx,
-                                              mirror::DexCache* dex_cache,
-                                              mirror::ClassLoader* class_loader,
+                                              SirtRef<mirror::DexCache>& dex_cache,
+                                              SirtRef<mirror::ClassLoader>& class_loader,
                                               const mirror::ArtMethod* referrer,
                                               InvokeType type) {
-  DCHECK(dex_cache != NULL);
+  DCHECK(dex_cache.get() != NULL);
   // Check for hit in the dex cache.
   mirror::ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx);
   if (resolved != NULL && !resolved->IsRuntimeMethod()) {
@@ -4104,15 +4099,15 @@
   switch (type) {
     case kDirect:  // Fall-through.
     case kStatic:
-      resolved = klass->FindDirectMethod(dex_cache, method_idx);
+      resolved = klass->FindDirectMethod(dex_cache.get(), method_idx);
       break;
     case kInterface:
-      resolved = klass->FindInterfaceMethod(dex_cache, method_idx);
+      resolved = klass->FindInterfaceMethod(dex_cache.get(), method_idx);
       DCHECK(resolved == NULL || resolved->GetDeclaringClass()->IsInterface());
       break;
     case kSuper:  // Fall-through.
     case kVirtual:
-      resolved = klass->FindVirtualMethod(dex_cache, method_idx);
+      resolved = klass->FindVirtualMethod(dex_cache.get(), method_idx);
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << type;
@@ -4227,12 +4222,11 @@
   }
 }
 
-mirror::ArtField* ClassLinker::ResolveField(const DexFile& dex_file,
-                                            uint32_t field_idx,
-                                            mirror::DexCache* dex_cache,
-                                            mirror::ClassLoader* class_loader,
+mirror::ArtField* ClassLinker::ResolveField(const DexFile& dex_file, uint32_t field_idx,
+                                            SirtRef<mirror::DexCache>& dex_cache,
+                                            SirtRef<mirror::ClassLoader>& class_loader,
                                             bool is_static) {
-  DCHECK(dex_cache != NULL);
+  DCHECK(dex_cache.get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
   if (resolved != NULL) {
     return resolved;
@@ -4245,9 +4239,9 @@
   }
 
   if (is_static) {
-    resolved = klass->FindStaticField(dex_cache, field_idx);
+    resolved = klass->FindStaticField(dex_cache.get(), field_idx);
   } else {
-    resolved = klass->FindInstanceField(dex_cache, field_idx);
+    resolved = klass->FindInstanceField(dex_cache.get(), field_idx);
   }
 
   if (resolved == NULL) {
@@ -4269,9 +4263,9 @@
 
 mirror::ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file,
                                                uint32_t field_idx,
-                                               mirror::DexCache* dex_cache,
-                                               mirror::ClassLoader* class_loader) {
-  DCHECK(dex_cache != NULL);
+                                               SirtRef<mirror::DexCache>& dex_cache,
+                                               SirtRef<mirror::ClassLoader>& class_loader) {
+  DCHECK(dex_cache.get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
   if (resolved != NULL) {
     return resolved;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 473370d..4e2cc06 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -25,6 +25,7 @@
 #include "base/mutex.h"
 #include "dex_file.h"
 #include "gtest/gtest.h"
+#include "jni.h"
 #include "root_visitor.h"
 #include "oat_file.h"
 
@@ -45,6 +46,7 @@
 
 class InternTable;
 class ObjectLock;
+class ScopedObjectAccess;
 template<class T> class SirtRef;
 
 typedef bool (ClassVisitor)(mirror::Class* c, void* arg);
@@ -56,29 +58,31 @@
   // (non-marker) interfaces.
   static constexpr size_t kImtSize = 64;
 
-  // Creates the class linker by bootstrapping from dex files.
-  static ClassLinker* CreateFromCompiler(const std::vector<const DexFile*>& boot_class_path,
-                                         InternTable* intern_table)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Creates the class linker from an image.
-  static ClassLinker* CreateFromImage(InternTable* intern_table)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
+  explicit ClassLinker(InternTable* intern_table);
   ~ClassLinker();
 
+  // Initialize class linker by bootstraping from dex files
+  void InitFromCompiler(const std::vector<const DexFile*>& boot_class_path)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Initialize class linker from one or more images.
+  void InitFromImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool IsInBootClassPath(const char* descriptor);
 
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
-  mirror::Class* FindClass(const char* descriptor, mirror::ClassLoader* class_loader)
+  mirror::Class* FindClass(const char* descriptor, SirtRef<mirror::ClassLoader>& class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Class* FindSystemClass(const char* descriptor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Reutrns true if the class linker is initialized.
+  bool IsInitialized() const;
+
   // Define a new a class based on a ClassDef from a DexFile
-  mirror::Class* DefineClass(const char* descriptor, mirror::ClassLoader* class_loader,
+  mirror::Class* DefineClass(const char* descriptor, SirtRef<mirror::ClassLoader>& class_loader,
                              const DexFile& dex_file, const DexFile::ClassDef& dex_class_def)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -122,7 +126,7 @@
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache.
   mirror::String* ResolveString(const DexFile& dex_file, uint32_t string_idx,
-                                mirror::DexCache* dex_cache)
+                                SirtRef<mirror::DexCache>& dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
@@ -130,12 +134,7 @@
   // target DexCache and ClassLoader to use for resolution.
   mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx,
                              const mirror::Class* referrer)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return ResolveType(dex_file,
-                       type_idx,
-                       referrer->GetDexCache(),
-                       referrer->GetClassLoader());
-  }
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
@@ -150,10 +149,9 @@
   // result in DexCache. The ClassLoader is used to search for the
   // type, since it may be referenced from but not contained within
   // the given DexFile.
-  mirror::Class* ResolveType(const DexFile& dex_file,
-                             uint16_t type_idx,
-                             mirror::DexCache* dex_cache,
-                             mirror::ClassLoader* class_loader)
+  mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx,
+                             SirtRef<mirror::DexCache>& dex_cache,
+                             SirtRef<mirror::ClassLoader>& class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a method with a given ID from the DexFile, storing the
@@ -163,8 +161,8 @@
   // virtual method.
   mirror::ArtMethod* ResolveMethod(const DexFile& dex_file,
                                    uint32_t method_idx,
-                                   mirror::DexCache* dex_cache,
-                                   mirror::ClassLoader* class_loader,
+                                   SirtRef<mirror::DexCache>& dex_cache,
+                                   SirtRef<mirror::ClassLoader>& class_loader,
                                    const mirror::ArtMethod* referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -184,8 +182,8 @@
   // field.
   mirror::ArtField* ResolveField(const DexFile& dex_file,
                                  uint32_t field_idx,
-                                 mirror::DexCache* dex_cache,
-                                 mirror::ClassLoader* class_loader,
+                                 SirtRef<mirror::DexCache>& dex_cache,
+                                 SirtRef<mirror::ClassLoader>& class_loader,
                                  bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -195,8 +193,8 @@
   // field resolution semantics are followed.
   mirror::ArtField* ResolveFieldJLS(const DexFile& dex_file,
                                     uint32_t field_idx,
-                                    mirror::DexCache* dex_cache,
-                                    mirror::ClassLoader* class_loader)
+                                    SirtRef<mirror::DexCache>& dex_cache,
+                                    SirtRef<mirror::ClassLoader>& class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get shorty from method index without resolution. Used to do handlerization.
@@ -314,10 +312,8 @@
   void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, mirror::ArtMethod* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Class* CreateProxyClass(mirror::String* name, mirror::ObjectArray<mirror::Class>* interfaces,
-                                  mirror::ClassLoader* loader,
-                                  mirror::ObjectArray<mirror::ArtMethod>* methods,
-                                  mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >* throws)
+  mirror::Class* CreateProxyClass(ScopedObjectAccess& soa, jstring name, jobjectArray interfaces,
+                                  jobject loader, jobjectArray methods, jobjectArray throws)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   std::string GetDescriptorForProxy(const mirror::Class* proxy_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -364,18 +360,13 @@
       LOCKS_EXCLUDED(Locks::classlinker_classes_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
- private:
-  explicit ClassLinker(InternTable*);
+  // Special code to allocate an art method, use this instead of class->AllocObject.
+  mirror::ArtMethod* AllocArtMethod(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+ private:
   const OatFile::OatMethod GetOatMethodFor(const mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Initialize class linker by bootstraping from dex files
-  void InitFromCompiler(const std::vector<const DexFile*>& boot_class_path)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Initialize class linker from one or more images.
-  void InitFromImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   OatFile& GetImageOatFile(gc::space::ImageSpace* space)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -393,7 +384,6 @@
   mirror::DexCache* AllocDexCache(Thread* self, const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::ArtField* AllocArtField(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::ArtMethod* AllocArtMethod(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Class* CreatePrimitiveClass(Thread* self, Primitive::Type type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -401,7 +391,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
-  mirror::Class* CreateArrayClass(const char* descriptor, mirror::ClassLoader* class_loader)
+  mirror::Class* CreateArrayClass(const char* descriptor,
+                                  SirtRef<mirror::ClassLoader>& class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void AppendToBootClassPath(const DexFile& dex_file)
@@ -458,8 +449,8 @@
                                                      const mirror::Class* klass2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkClass(SirtRef<mirror::Class>& klass, mirror::ObjectArray<mirror::Class>* interfaces,
-                 Thread* self)
+  bool LinkClass(Thread* self, SirtRef<mirror::Class>& klass,
+                 SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkSuperClass(SirtRef<mirror::Class>& klass)
@@ -468,14 +459,15 @@
   bool LoadSuperAndInterfaces(SirtRef<mirror::Class>& klass, const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool LinkMethods(SirtRef<mirror::Class>& klass, mirror::ObjectArray<mirror::Class>* interfaces)
+  bool LinkMethods(SirtRef<mirror::Class>& klass,
+                   SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkVirtualMethods(SirtRef<mirror::Class>& klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkInterfaceMethods(SirtRef<mirror::Class>& klass,
-                            mirror::ObjectArray<mirror::Class>* interfaces)
+                            SirtRef<mirror::ObjectArray<mirror::Class> >& interfaces)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkStaticFields(SirtRef<mirror::Class>& klass)
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index a52b680..b8bc474 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -95,7 +95,8 @@
                         const std::string& component_type,
                         mirror::ClassLoader* class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::Class* array = class_linker_->FindClass(array_descriptor.c_str(), class_loader);
+    SirtRef<mirror::ClassLoader> loader(Thread::Current(), class_loader);
+    mirror::Class* array = class_linker_->FindClass(array_descriptor.c_str(), loader);
     ClassHelper array_component_ch(array->GetComponentType());
     EXPECT_STREQ(component_type.c_str(), array_component_ch.GetDescriptor());
     EXPECT_EQ(class_loader, array->GetClassLoader());
@@ -647,12 +648,12 @@
   ScopedObjectAccess soa(Thread::Current());
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Nested")));
 
-  mirror::Class* outer = class_linker_->FindClass("LNested;", class_loader.get());
+  mirror::Class* outer = class_linker_->FindClass("LNested;", class_loader);
   ASSERT_TRUE(outer != NULL);
   EXPECT_EQ(0U, outer->NumVirtualMethods());
   EXPECT_EQ(1U, outer->NumDirectMethods());
 
-  mirror::Class* inner = class_linker_->FindClass("LNested$Inner;", class_loader.get());
+  mirror::Class* inner = class_linker_->FindClass("LNested$Inner;", class_loader);
   ASSERT_TRUE(inner != NULL);
   EXPECT_EQ(0U, inner->NumVirtualMethods());
   EXPECT_EQ(1U, inner->NumDirectMethods());
@@ -711,7 +712,7 @@
 
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass")));
   AssertNonExistentClass("LMyClass;");
-  mirror::Class* MyClass = class_linker_->FindClass("LMyClass;", class_loader.get());
+  mirror::Class* MyClass = class_linker_->FindClass("LMyClass;", class_loader);
   kh.ChangeClass(MyClass);
   ASSERT_TRUE(MyClass != NULL);
   ASSERT_TRUE(MyClass->GetClass() != NULL);
@@ -809,29 +810,30 @@
   // Validate that the "value" field is always the 0th field in each of java.lang's box classes.
   // This lets UnboxPrimitive avoid searching for the field by name at runtime.
   ScopedObjectAccess soa(Thread::Current());
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
   mirror::Class* c;
-  c = class_linker_->FindClass("Ljava/lang/Boolean;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Boolean;", class_loader);
   FieldHelper fh(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Byte;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Byte;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Character;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Character;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Double;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Double;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Float;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Float;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Integer;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Integer;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Long;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Long;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
-  c = class_linker_->FindClass("Ljava/lang/Short;", NULL);
+  c = class_linker_->FindClass("Ljava/lang/Short;", class_loader);
   fh.ChangeField(c->GetIFields()->Get(0));
   EXPECT_STREQ("value", fh.GetName());
 }
@@ -840,8 +842,8 @@
   ScopedObjectAccess soa(Thread::Current());
   SirtRef<mirror::ClassLoader> class_loader_1(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass")));
   SirtRef<mirror::ClassLoader> class_loader_2(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("MyClass")));
-  mirror::Class* MyClass_1 = class_linker_->FindClass("LMyClass;", class_loader_1.get());
-  mirror::Class* MyClass_2 = class_linker_->FindClass("LMyClass;", class_loader_2.get());
+  mirror::Class* MyClass_1 = class_linker_->FindClass("LMyClass;", class_loader_1);
+  mirror::Class* MyClass_2 = class_linker_->FindClass("LMyClass;", class_loader_2);
   EXPECT_TRUE(MyClass_1 != NULL);
   EXPECT_TRUE(MyClass_2 != NULL);
   EXPECT_NE(MyClass_1, MyClass_2);
@@ -850,7 +852,7 @@
 TEST_F(ClassLinkerTest, StaticFields) {
   ScopedObjectAccess soa(Thread::Current());
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Statics")));
-  mirror::Class* statics = class_linker_->FindClass("LStatics;", class_loader.get());
+  mirror::Class* statics = class_linker_->FindClass("LStatics;", class_loader);
   class_linker_->EnsureInitialized(statics, true, true);
 
   // Static final primitives that are initialized by a compile-time constant
@@ -932,11 +934,11 @@
 TEST_F(ClassLinkerTest, Interfaces) {
   ScopedObjectAccess soa(Thread::Current());
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(LoadDex("Interfaces")));
-  mirror::Class* I = class_linker_->FindClass("LInterfaces$I;", class_loader.get());
-  mirror::Class* J = class_linker_->FindClass("LInterfaces$J;", class_loader.get());
-  mirror::Class* K = class_linker_->FindClass("LInterfaces$K;", class_loader.get());
-  mirror::Class* A = class_linker_->FindClass("LInterfaces$A;", class_loader.get());
-  mirror::Class* B = class_linker_->FindClass("LInterfaces$B;", class_loader.get());
+  mirror::Class* I = class_linker_->FindClass("LInterfaces$I;", class_loader);
+  mirror::Class* J = class_linker_->FindClass("LInterfaces$J;", class_loader);
+  mirror::Class* K = class_linker_->FindClass("LInterfaces$K;", class_loader);
+  mirror::Class* A = class_linker_->FindClass("LInterfaces$A;", class_loader);
+  mirror::Class* B = class_linker_->FindClass("LInterfaces$B;", class_loader);
   EXPECT_TRUE(I->IsAssignableFrom(A));
   EXPECT_TRUE(J->IsAssignableFrom(A));
   EXPECT_TRUE(J->IsAssignableFrom(K));
@@ -995,8 +997,7 @@
   SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
   const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(jclass_loader)[0];
   CHECK(dex_file != NULL);
-
-  mirror::Class* klass = class_linker_->FindClass("LStaticsFromCode;", class_loader.get());
+  mirror::Class* klass = class_linker_->FindClass("LStaticsFromCode;", class_loader);
   mirror::ArtMethod* clinit = klass->FindClassInitializer();
   mirror::ArtMethod* getS0 = klass->FindDirectMethod("getS0", "()Ljava/lang/Object;");
   const DexFile::StringId* string_id = dex_file->FindStringId("LStaticsFromCode;");
@@ -1049,10 +1050,9 @@
 
 TEST_F(ClassLinkerTest, ClassRootDescriptors) {
   ScopedObjectAccess soa(Thread::Current());
-  ClassHelper kh;
   for (int i = 0; i < ClassLinker::kClassRootsMax; i++) {
     mirror::Class* klass = class_linker_->GetClassRoot(ClassLinker::ClassRoot(i));
-    kh.ChangeClass(klass);
+    ClassHelper kh(klass);
     EXPECT_TRUE(kh.GetDescriptor() != NULL);
     EXPECT_STREQ(kh.GetDescriptor(),
                  class_linker_->GetClassRootDescriptor(ClassLinker::ClassRoot(i))) << " i = " << i;
diff --git a/runtime/common_test.h b/runtime/common_test.h
index 643ed1d..57cf71a 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -237,7 +237,6 @@
 // input 'str' is a comma separated list of feature names.  Parse it and
 // return the InstructionSetFeatures object.
 static InstructionSetFeatures ParseFeatureList(std::string str) {
-  LOG(INFO) << "Parsing features " << str;
   InstructionSetFeatures result;
   typedef std::vector<std::string> FeatureList;
   FeatureList features;
@@ -569,7 +568,8 @@
   void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
-    mirror::Class* klass = class_linker_->FindClass(class_descriptor.c_str(), class_loader);
+    SirtRef<mirror::ClassLoader> loader(Thread::Current(), class_loader);
+    mirror::Class* klass = class_linker_->FindClass(class_descriptor.c_str(), loader);
     CHECK(klass != NULL) << "Class not found " << class_name;
     for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
       CompileMethod(klass->GetDirectMethod(i));
@@ -581,16 +581,15 @@
 
   void CompileMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(method != NULL);
-    base::TimingLogger timings("CommonTest::CompileMethod", false, false);
+    TimingLogger timings("CommonTest::CompileMethod", false, false);
     timings.StartSplit("CompileOne");
     compiler_driver_->CompileOne(method, timings);
     MakeExecutable(method);
+    timings.EndSplit();
   }
 
-  void CompileDirectMethod(mirror::ClassLoader* class_loader,
-                           const char* class_name,
-                           const char* method_name,
-                           const char* signature)
+  void CompileDirectMethod(SirtRef<mirror::ClassLoader>& class_loader, const char* class_name,
+                           const char* method_name, const char* signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
     mirror::Class* klass = class_linker_->FindClass(class_descriptor.c_str(), class_loader);
@@ -601,10 +600,8 @@
     CompileMethod(method);
   }
 
-  void CompileVirtualMethod(mirror::ClassLoader* class_loader,
-                            const char* class_name,
-                            const char* method_name,
-                            const char* signature)
+  void CompileVirtualMethod(SirtRef<mirror::ClassLoader>& class_loader, const char* class_name,
+                            const char* method_name, const char* signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
     mirror::Class* klass = class_linker_->FindClass(class_descriptor.c_str(), class_loader);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index bdcf6ac..6c4d130 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -95,20 +95,6 @@
   return os;
 }
 
-struct SingleStepControl {
-  // Are we single-stepping right now?
-  bool is_active;
-  Thread* thread;
-
-  JDWP::JdwpStepSize step_size;
-  JDWP::JdwpStepDepth step_depth;
-
-  const mirror::ArtMethod* method;
-  int32_t line_number;  // Or -1 for native methods.
-  std::set<uint32_t> dex_pcs;
-  int stack_depth;
-};
-
 class DebugInstrumentationListener : public instrumentation::InstrumentationListener {
  public:
   DebugInstrumentationListener() {}
@@ -121,26 +107,26 @@
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::PostLocationEvent(method, 0, this_object, Dbg::kMethodEntry);
+    Dbg::PostLocationEvent(method, 0, this_object, Dbg::kMethodEntry, nullptr);
   }
 
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
                             const mirror::ArtMethod* method,
                             uint32_t dex_pc, const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    UNUSED(return_value);
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
       return;
     }
-    Dbg::PostLocationEvent(method, dex_pc, this_object, Dbg::kMethodExit);
+    Dbg::PostLocationEvent(method, dex_pc, this_object, Dbg::kMethodExit, &return_value);
   }
 
-  virtual void MethodUnwind(Thread* thread, const mirror::ArtMethod* method,
-                            uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                            const mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
-        << " " << dex_pc;
+               << " " << dex_pc;
   }
 
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
@@ -192,7 +178,6 @@
 
 // Breakpoints and single-stepping.
 static std::vector<Breakpoint> gBreakpoints GUARDED_BY(Locks::breakpoint_lock_);
-static SingleStepControl gSingleStepControl GUARDED_BY(Locks::breakpoint_lock_);
 
 static bool IsBreakpoint(const mirror::ArtMethod* m, uint32_t dex_pc)
     LOCKS_EXCLUDED(Locks::breakpoint_lock_)
@@ -1118,7 +1103,7 @@
   if (c == NULL) {
     return status;
   }
-  new_array = gRegistry->Add(mirror::Array::Alloc(Thread::Current(), c, length));
+  new_array = gRegistry->Add(mirror::Array::Alloc<true>(Thread::Current(), c, length));
   return JDWP::ERR_NONE;
 }
 
@@ -1133,38 +1118,26 @@
 
 static JDWP::FieldId ToFieldId(const mirror::ArtField* f)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifdef MOVING_GARBAGE_COLLECTOR
-  UNIMPLEMENTED(FATAL);
-#else
+  CHECK(!kMovingFields);
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
-#endif
 }
 
 static JDWP::MethodId ToMethodId(const mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifdef MOVING_GARBAGE_COLLECTOR
-  UNIMPLEMENTED(FATAL);
-#else
+  CHECK(!kMovingMethods);
   return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(m));
-#endif
 }
 
 static mirror::ArtField* FromFieldId(JDWP::FieldId fid)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifdef MOVING_GARBAGE_COLLECTOR
-  UNIMPLEMENTED(FATAL);
-#else
+  CHECK(!kMovingFields);
   return reinterpret_cast<mirror::ArtField*>(static_cast<uintptr_t>(fid));
-#endif
 }
 
 static mirror::ArtMethod* FromMethodId(JDWP::MethodId mid)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifdef MOVING_GARBAGE_COLLECTOR
-  UNIMPLEMENTED(FATAL);
-#else
+  CHECK(!kMovingMethods);
   return reinterpret_cast<mirror::ArtMethod*>(static_cast<uintptr_t>(mid));
-#endif
 }
 
 static void SetLocation(JDWP::JdwpLocation& location, mirror::ArtMethod* m, uint32_t dex_pc)
@@ -1205,41 +1178,37 @@
   return accessFlags;
 }
 
-static const uint16_t kEclipseWorkaroundSlot = 1000;
-
 /*
- * Eclipse appears to expect that the "this" reference is in slot zero.
- * If it's not, the "variables" display will show two copies of "this",
- * possibly because it gets "this" from SF.ThisObject and then displays
- * all locals with nonzero slot numbers.
- *
- * So, we remap the item in slot 0 to 1000, and remap "this" to zero.  On
- * SF.GetValues / SF.SetValues we map them back.
- *
- * TODO: jdb uses the value to determine whether a variable is a local or an argument,
- * by checking whether it's less than the number of arguments. To make that work, we'd
- * have to "mangle" all the arguments to come first, not just the implicit argument 'this'.
+ * Circularly shifts registers so that arguments come first. Debuggers
+ * expect slots to begin with arguments, but dex code places them at
+ * the end.
  */
-static uint16_t MangleSlot(uint16_t slot, const char* name) {
-  uint16_t newSlot = slot;
-  if (strcmp(name, "this") == 0) {
-    newSlot = 0;
-  } else if (slot == 0) {
-    newSlot = kEclipseWorkaroundSlot;
+static uint16_t MangleSlot(uint16_t slot, mirror::ArtMethod* m)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+  uint16_t ins_size = code_item->ins_size_;
+  uint16_t locals_size = code_item->registers_size_ - ins_size;
+  if (slot >= locals_size) {
+    return slot - locals_size;
+  } else {
+    return slot + ins_size;
   }
-  return newSlot;
 }
 
+/*
+ * Circularly shifts registers so that arguments come last. Reverts
+ * slots to dex style argument placement.
+ */
 static uint16_t DemangleSlot(uint16_t slot, mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (slot == kEclipseWorkaroundSlot) {
-    return 0;
-  } else if (slot == 0) {
-    const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
-    CHECK(code_item != NULL) << PrettyMethod(m);
-    return code_item->registers_size_ - code_item->ins_size_;
+  const DexFile::CodeItem* code_item = MethodHelper(m).GetCodeItem();
+  uint16_t ins_size = code_item->ins_size_;
+  uint16_t locals_size = code_item->registers_size_ - ins_size;
+  if (slot < ins_size) {
+    return slot + locals_size;
+  } else {
+    return slot - ins_size;
   }
-  return slot;
 }
 
 JDWP::JdwpError Dbg::OutputDeclaredFields(JDWP::RefTypeId class_id, bool with_generic, JDWP::ExpandBuf* pReply) {
@@ -1358,16 +1327,18 @@
 
 void Dbg::OutputVariableTable(JDWP::RefTypeId, JDWP::MethodId method_id, bool with_generic, JDWP::ExpandBuf* pReply) {
   struct DebugCallbackContext {
+    mirror::ArtMethod* method;
     JDWP::ExpandBuf* pReply;
     size_t variable_count;
     bool with_generic;
 
-    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress, const char* name, const char* descriptor, const char* signature) {
+    static void Callback(void* context, uint16_t slot, uint32_t startAddress, uint32_t endAddress, const char* name, const char* descriptor, const char* signature)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       DebugCallbackContext* pContext = reinterpret_cast<DebugCallbackContext*>(context);
 
-      VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d", pContext->variable_count, startAddress, endAddress - startAddress, name, descriptor, signature, slot, MangleSlot(slot, name));
+      VLOG(jdwp) << StringPrintf("    %2zd: %d(%d) '%s' '%s' '%s' actual slot=%d mangled slot=%d", pContext->variable_count, startAddress, endAddress - startAddress, name, descriptor, signature, slot, MangleSlot(slot, pContext->method));
 
-      slot = MangleSlot(slot, name);
+      slot = MangleSlot(slot, pContext->method);
 
       expandBufAdd8BE(pContext->pReply, startAddress);
       expandBufAddUtf8String(pContext->pReply, name);
@@ -1395,6 +1366,7 @@
   expandBufAdd4BE(pReply, 0);
 
   DebugCallbackContext context;
+  context.method = m;
   context.pReply = pReply;
   context.variable_count = 0;
   context.with_generic = with_generic;
@@ -1405,6 +1377,13 @@
   JDWP::Set4BE(expandBufGetBuffer(pReply) + variable_count_offset, context.variable_count);
 }
 
+void Dbg::OutputMethodReturnValue(JDWP::MethodId method_id, const JValue* return_value,
+                                  JDWP::ExpandBuf* pReply) {
+  mirror::ArtMethod* m = FromMethodId(method_id);
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(MethodHelper(m).GetShorty());
+  OutputJValue(tag, return_value, pReply);
+}
+
 JDWP::JdwpError Dbg::GetBytecodes(JDWP::RefTypeId, JDWP::MethodId method_id,
                                   std::vector<uint8_t>& bytecodes)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1473,25 +1452,18 @@
   }
 
   JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
-
-  if (IsPrimitiveTag(tag)) {
-    expandBufAdd1(pReply, tag);
-    if (tag == JDWP::JT_BOOLEAN || tag == JDWP::JT_BYTE) {
-      expandBufAdd1(pReply, f->Get32(o));
-    } else if (tag == JDWP::JT_CHAR || tag == JDWP::JT_SHORT) {
-      expandBufAdd2BE(pReply, f->Get32(o));
-    } else if (tag == JDWP::JT_FLOAT || tag == JDWP::JT_INT) {
-      expandBufAdd4BE(pReply, f->Get32(o));
-    } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
-      expandBufAdd8BE(pReply, f->Get64(o));
-    } else {
-      LOG(FATAL) << "Unknown tag: " << tag;
-    }
+  JValue field_value;
+  if (tag == JDWP::JT_VOID) {
+    LOG(FATAL) << "Unknown tag: " << tag;
+  } else if (!IsPrimitiveTag(tag)) {
+    field_value.SetL(f->GetObject(o));
+  } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
+    field_value.SetJ(f->Get64(o));
   } else {
-    mirror::Object* value = f->GetObject(o);
-    expandBufAdd1(pReply, TagFromObject(value));
-    expandBufAddObjectId(pReply, gRegistry->Add(value));
+    field_value.SetI(f->Get32(o));
   }
+  Dbg::OutputJValue(tag, &field_value, pReply);
+
   return JDWP::ERR_NONE;
 }
 
@@ -1569,6 +1541,27 @@
   return s->ToModifiedUtf8();
 }
 
+void Dbg::OutputJValue(JDWP::JdwpTag tag, const JValue* return_value, JDWP::ExpandBuf* pReply) {
+  if (IsPrimitiveTag(tag)) {
+    expandBufAdd1(pReply, tag);
+    if (tag == JDWP::JT_BOOLEAN || tag == JDWP::JT_BYTE) {
+      expandBufAdd1(pReply, return_value->GetI());
+    } else if (tag == JDWP::JT_CHAR || tag == JDWP::JT_SHORT) {
+      expandBufAdd2BE(pReply, return_value->GetI());
+    } else if (tag == JDWP::JT_FLOAT || tag == JDWP::JT_INT) {
+      expandBufAdd4BE(pReply, return_value->GetI());
+    } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
+      expandBufAdd8BE(pReply, return_value->GetJ());
+    } else {
+      CHECK_EQ(tag, JDWP::JT_VOID);
+    }
+  } else {
+    mirror::Object* value = return_value->GetL();
+    expandBufAdd1(pReply, TagFromObject(value));
+    expandBufAddObjectId(pReply, gRegistry->Add(value));
+  }
+}
+
 JDWP::JdwpError Dbg::GetThreadName(JDWP::ObjectId thread_id, std::string& name) {
   ScopedObjectAccessUnchecked soa(Thread::Current());
   MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
@@ -2079,7 +2072,7 @@
           CHECK_EQ(width_, sizeof(JDWP::ObjectId));
           mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
           VLOG(jdwp) << "get array local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
+          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
             LOG(FATAL) << "Register " << reg << " expected to hold array: " << o;
           }
           JDWP::SetObjectId(buf_+1, gRegistry->Add(o));
@@ -2095,7 +2088,7 @@
           CHECK_EQ(width_, sizeof(JDWP::ObjectId));
           mirror::Object* o = reinterpret_cast<mirror::Object*>(GetVReg(m, reg, kReferenceVReg));
           VLOG(jdwp) << "get object local " << reg << " = " << o;
-          if (!Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
+          if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
             LOG(FATAL) << "Register " << reg << " expected to hold object: " << o;
           }
           tag_ = TagFromObject(o);
@@ -2238,8 +2231,8 @@
   visitor.WalkStack();
 }
 
-void Dbg::PostLocationEvent(const mirror::ArtMethod* m, int dex_pc,
-                            mirror::Object* this_object, int event_flags) {
+void Dbg::PostLocationEvent(const mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
+                            int event_flags, const JValue* return_value) {
   mirror::Class* c = m->GetDeclaringClass();
 
   JDWP::JdwpLocation location;
@@ -2254,7 +2247,7 @@
   if (gRegistry->Contains(this_object)) {
     this_id = gRegistry->Add(this_object);
   }
-  gJdwpState->PostLocationEvent(&location, this_id, event_flags);
+  gJdwpState->PostLocationEvent(&location, this_id, event_flags, return_value);
 }
 
 void Dbg::PostException(Thread* thread, const ThrowLocation& throw_location,
@@ -2304,63 +2297,62 @@
     event_flags |= kBreakpoint;
   }
 
-  {
-    // If the debugger is single-stepping one of our threads, check to
-    // see if we're that thread and we've reached a step point.
-    MutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
-    if (gSingleStepControl.is_active && gSingleStepControl.thread == thread) {
-      CHECK(!m->IsNative());
-      if (gSingleStepControl.step_depth == JDWP::SD_INTO) {
-        // Step into method calls.  We break when the line number
-        // or method pointer changes.  If we're in SS_MIN mode, we
-        // always stop.
-        if (gSingleStepControl.method != m) {
-          event_flags |= kSingleStep;
-          VLOG(jdwp) << "SS new method";
-        } else if (gSingleStepControl.step_size == JDWP::SS_MIN) {
+  // If the debugger is single-stepping one of our threads, check to
+  // see if we're that thread and we've reached a step point.
+  const SingleStepControl* single_step_control = thread->GetSingleStepControl();
+  DCHECK(single_step_control != nullptr);
+  if (single_step_control->is_active) {
+    CHECK(!m->IsNative());
+    if (single_step_control->step_depth == JDWP::SD_INTO) {
+      // Step into method calls.  We break when the line number
+      // or method pointer changes.  If we're in SS_MIN mode, we
+      // always stop.
+      if (single_step_control->method != m) {
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS new method";
+      } else if (single_step_control->step_size == JDWP::SS_MIN) {
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS new instruction";
+      } else if (single_step_control->dex_pcs.find(dex_pc) == single_step_control->dex_pcs.end()) {
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS new line";
+      }
+    } else if (single_step_control->step_depth == JDWP::SD_OVER) {
+      // Step over method calls.  We break when the line number is
+      // different and the frame depth is <= the original frame
+      // depth.  (We can't just compare on the method, because we
+      // might get unrolled past it by an exception, and it's tricky
+      // to identify recursion.)
+
+      int stack_depth = GetStackDepth(thread);
+
+      if (stack_depth < single_step_control->stack_depth) {
+        // Popped up one or more frames, always trigger.
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS method pop";
+      } else if (stack_depth == single_step_control->stack_depth) {
+        // Same depth, see if we moved.
+        if (single_step_control->step_size == JDWP::SS_MIN) {
           event_flags |= kSingleStep;
           VLOG(jdwp) << "SS new instruction";
-        } else if (gSingleStepControl.dex_pcs.find(dex_pc) == gSingleStepControl.dex_pcs.end()) {
+        } else if (single_step_control->dex_pcs.find(dex_pc) == single_step_control->dex_pcs.end()) {
           event_flags |= kSingleStep;
           VLOG(jdwp) << "SS new line";
         }
-      } else if (gSingleStepControl.step_depth == JDWP::SD_OVER) {
-        // Step over method calls.  We break when the line number is
-        // different and the frame depth is <= the original frame
-        // depth.  (We can't just compare on the method, because we
-        // might get unrolled past it by an exception, and it's tricky
-        // to identify recursion.)
+      }
+    } else {
+      CHECK_EQ(single_step_control->step_depth, JDWP::SD_OUT);
+      // Return from the current method.  We break when the frame
+      // depth pops up.
 
-        int stack_depth = GetStackDepth(thread);
+      // This differs from the "method exit" break in that it stops
+      // with the PC at the next instruction in the returned-to
+      // function, rather than the end of the returning function.
 
-        if (stack_depth < gSingleStepControl.stack_depth) {
-          // popped up one or more frames, always trigger
-          event_flags |= kSingleStep;
-          VLOG(jdwp) << "SS method pop";
-        } else if (stack_depth == gSingleStepControl.stack_depth) {
-          // same depth, see if we moved
-          if (gSingleStepControl.step_size == JDWP::SS_MIN) {
-            event_flags |= kSingleStep;
-            VLOG(jdwp) << "SS new instruction";
-          } else if (gSingleStepControl.dex_pcs.find(dex_pc) == gSingleStepControl.dex_pcs.end()) {
-            event_flags |= kSingleStep;
-            VLOG(jdwp) << "SS new line";
-          }
-        }
-      } else {
-        CHECK_EQ(gSingleStepControl.step_depth, JDWP::SD_OUT);
-        // Return from the current method.  We break when the frame
-        // depth pops up.
-
-        // This differs from the "method exit" break in that it stops
-        // with the PC at the next instruction in the returned-to
-        // function, rather than the end of the returning function.
-
-        int stack_depth = GetStackDepth(thread);
-        if (stack_depth < gSingleStepControl.stack_depth) {
-          event_flags |= kSingleStep;
-          VLOG(jdwp) << "SS method pop";
-        }
+      int stack_depth = GetStackDepth(thread);
+      if (stack_depth < single_step_control->stack_depth) {
+        event_flags |= kSingleStep;
+        VLOG(jdwp) << "SS method pop";
       }
     }
   }
@@ -2368,7 +2360,7 @@
   // If there's something interesting going on, see if it matches one
   // of the debugger filters.
   if (event_flags != 0) {
-    Dbg::PostLocationEvent(m, dex_pc, this_object, event_flags);
+    Dbg::PostLocationEvent(m, dex_pc, this_object, event_flags, nullptr);
   }
 }
 
@@ -2456,50 +2448,50 @@
     return sts.GetError();
   }
 
-  MutexLock mu2(self, *Locks::breakpoint_lock_);
-  // TODO: there's no theoretical reason why we couldn't support single-stepping
-  // of multiple threads at once, but we never did so historically.
-  if (gSingleStepControl.thread != NULL && sts.GetThread() != gSingleStepControl.thread) {
-    LOG(WARNING) << "single-step already active for " << *gSingleStepControl.thread
-                 << "; switching to " << *sts.GetThread();
-  }
-
   //
   // Work out what Method* we're in, the current line number, and how deep the stack currently
   // is for step-out.
   //
 
   struct SingleStepStackVisitor : public StackVisitor {
-    explicit SingleStepStackVisitor(Thread* thread)
-        EXCLUSIVE_LOCKS_REQUIRED(Locks::breakpoint_lock_)
+    explicit SingleStepStackVisitor(Thread* thread, SingleStepControl* single_step_control,
+                                    int32_t* line_number)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, NULL) {
-      gSingleStepControl.method = NULL;
-      gSingleStepControl.stack_depth = 0;
+        : StackVisitor(thread, NULL), single_step_control_(single_step_control),
+          line_number_(line_number) {
+      DCHECK_EQ(single_step_control_, thread->GetSingleStepControl());
+      single_step_control_->method = NULL;
+      single_step_control_->stack_depth = 0;
     }
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
     // annotalysis.
     bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
-      Locks::breakpoint_lock_->AssertHeld(Thread::Current());
-      const mirror::ArtMethod* m = GetMethod();
+      mirror::ArtMethod* m = GetMethod();
       if (!m->IsRuntimeMethod()) {
-        ++gSingleStepControl.stack_depth;
-        if (gSingleStepControl.method == NULL) {
+        ++single_step_control_->stack_depth;
+        if (single_step_control_->method == NULL) {
           const mirror::DexCache* dex_cache = m->GetDeclaringClass()->GetDexCache();
-          gSingleStepControl.method = m;
-          gSingleStepControl.line_number = -1;
+          single_step_control_->method = m;
+          *line_number_ = -1;
           if (dex_cache != NULL) {
             const DexFile& dex_file = *dex_cache->GetDexFile();
-            gSingleStepControl.line_number = dex_file.GetLineNumFromPC(m, GetDexPc());
+            *line_number_ = dex_file.GetLineNumFromPC(m, GetDexPc());
           }
         }
       }
       return true;
     }
+
+    SingleStepControl* const single_step_control_;
+    int32_t* const line_number_;
   };
 
-  SingleStepStackVisitor visitor(sts.GetThread());
+  Thread* const thread = sts.GetThread();
+  SingleStepControl* const single_step_control = thread->GetSingleStepControl();
+  DCHECK(single_step_control != nullptr);
+  int32_t line_number = -1;
+  SingleStepStackVisitor visitor(thread, single_step_control, &line_number);
   visitor.WalkStack();
 
   //
@@ -2507,17 +2499,14 @@
   //
 
   struct DebugCallbackContext {
-    DebugCallbackContext() EXCLUSIVE_LOCKS_REQUIRED(Locks::breakpoint_lock_) {
-      last_pc_valid = false;
-      last_pc = 0;
+    explicit DebugCallbackContext(SingleStepControl* single_step_control, int32_t line_number)
+      : single_step_control_(single_step_control), line_number_(line_number),
+        last_pc_valid(false), last_pc(0) {
     }
 
-    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
-    // annotalysis.
-    static bool Callback(void* raw_context, uint32_t address, uint32_t line_number) NO_THREAD_SAFETY_ANALYSIS {
-      Locks::breakpoint_lock_->AssertHeld(Thread::Current());
+    static bool Callback(void* raw_context, uint32_t address, uint32_t line_number) {
       DebugCallbackContext* context = reinterpret_cast<DebugCallbackContext*>(raw_context);
-      if (static_cast<int32_t>(line_number) == gSingleStepControl.line_number) {
+      if (static_cast<int32_t>(line_number) == context->line_number_) {
         if (!context->last_pc_valid) {
           // Everything from this address until the next line change is ours.
           context->last_pc = address;
@@ -2528,35 +2517,32 @@
       } else if (context->last_pc_valid) {  // and the line number is new
         // Add everything from the last entry up until here to the set
         for (uint32_t dex_pc = context->last_pc; dex_pc < address; ++dex_pc) {
-          gSingleStepControl.dex_pcs.insert(dex_pc);
+          context->single_step_control_->dex_pcs.insert(dex_pc);
         }
         context->last_pc_valid = false;
       }
       return false;  // There may be multiple entries for any given line.
     }
 
-    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
-    // annotalysis.
-    ~DebugCallbackContext() NO_THREAD_SAFETY_ANALYSIS {
-      Locks::breakpoint_lock_->AssertHeld(Thread::Current());
+    ~DebugCallbackContext() {
       // If the line number was the last in the position table...
       if (last_pc_valid) {
-        size_t end = MethodHelper(gSingleStepControl.method).GetCodeItem()->insns_size_in_code_units_;
+        size_t end = MethodHelper(single_step_control_->method).GetCodeItem()->insns_size_in_code_units_;
         for (uint32_t dex_pc = last_pc; dex_pc < end; ++dex_pc) {
-          gSingleStepControl.dex_pcs.insert(dex_pc);
+          single_step_control_->dex_pcs.insert(dex_pc);
         }
       }
     }
 
+    SingleStepControl* const single_step_control_;
+    const int32_t line_number_;
     bool last_pc_valid;
     uint32_t last_pc;
   };
-  gSingleStepControl.dex_pcs.clear();
-  const mirror::ArtMethod* m = gSingleStepControl.method;
-  if (m->IsNative()) {
-    gSingleStepControl.line_number = -1;
-  } else {
-    DebugCallbackContext context;
+  single_step_control->dex_pcs.clear();
+  const mirror::ArtMethod* m = single_step_control->method;
+  if (!m->IsNative()) {
+    DebugCallbackContext context(single_step_control, line_number);
     MethodHelper mh(m);
     mh.GetDexFile().DecodeDebugInfo(mh.GetCodeItem(), m->IsStatic(), m->GetDexMethodIndex(),
                                     DebugCallbackContext::Callback, NULL, &context);
@@ -2566,20 +2552,19 @@
   // Everything else...
   //
 
-  gSingleStepControl.thread = sts.GetThread();
-  gSingleStepControl.step_size = step_size;
-  gSingleStepControl.step_depth = step_depth;
-  gSingleStepControl.is_active = true;
+  single_step_control->step_size = step_size;
+  single_step_control->step_depth = step_depth;
+  single_step_control->is_active = true;
 
   if (VLOG_IS_ON(jdwp)) {
-    VLOG(jdwp) << "Single-step thread: " << *gSingleStepControl.thread;
-    VLOG(jdwp) << "Single-step step size: " << gSingleStepControl.step_size;
-    VLOG(jdwp) << "Single-step step depth: " << gSingleStepControl.step_depth;
-    VLOG(jdwp) << "Single-step current method: " << PrettyMethod(gSingleStepControl.method);
-    VLOG(jdwp) << "Single-step current line: " << gSingleStepControl.line_number;
-    VLOG(jdwp) << "Single-step current stack depth: " << gSingleStepControl.stack_depth;
+    VLOG(jdwp) << "Single-step thread: " << *thread;
+    VLOG(jdwp) << "Single-step step size: " << single_step_control->step_size;
+    VLOG(jdwp) << "Single-step step depth: " << single_step_control->step_depth;
+    VLOG(jdwp) << "Single-step current method: " << PrettyMethod(single_step_control->method);
+    VLOG(jdwp) << "Single-step current line: " << line_number;
+    VLOG(jdwp) << "Single-step current stack depth: " << single_step_control->stack_depth;
     VLOG(jdwp) << "Single-step dex_pc values:";
-    for (std::set<uint32_t>::iterator it = gSingleStepControl.dex_pcs.begin() ; it != gSingleStepControl.dex_pcs.end(); ++it) {
+    for (std::set<uint32_t>::iterator it = single_step_control->dex_pcs.begin(); it != single_step_control->dex_pcs.end(); ++it) {
       VLOG(jdwp) << StringPrintf(" %#x", *it);
     }
   }
@@ -2587,12 +2572,17 @@
   return JDWP::ERR_NONE;
 }
 
-void Dbg::UnconfigureStep(JDWP::ObjectId /*thread_id*/) {
-  MutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
-
-  gSingleStepControl.is_active = false;
-  gSingleStepControl.thread = NULL;
-  gSingleStepControl.dex_pcs.clear();
+void Dbg::UnconfigureStep(JDWP::ObjectId thread_id) {
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
+  Thread* thread;
+  JDWP::JdwpError error = DecodeThread(soa, thread_id, thread);
+  if (error != JDWP::ERR_NONE) {
+    SingleStepControl* single_step_control = thread->GetSingleStepControl();
+    DCHECK(single_step_control != nullptr);
+    single_step_control->is_active = false;
+    single_step_control->dex_pcs.clear();
+  }
 }
 
 static char JdwpTagToShortyChar(JDWP::JdwpTag tag) {
@@ -2732,14 +2722,14 @@
       }
     }
 
-    req->receiver_ = receiver;
-    req->thread_ = thread;
-    req->class_ = c;
-    req->method_ = m;
-    req->arg_count_ = arg_count;
-    req->arg_values_ = arg_values;
-    req->options_ = options;
-    req->invoke_needed_ = true;
+    req->receiver = receiver;
+    req->thread = thread;
+    req->klass = c;
+    req->method = m;
+    req->arg_count = arg_count;
+    req->arg_values = arg_values;
+    req->options = options;
+    req->invoke_needed = true;
   }
 
   // The fact that we've released the thread list lock is a bit risky --- if the thread goes
@@ -2757,7 +2747,7 @@
 
     VLOG(jdwp) << "    Transferring control to event thread";
     {
-      MutexLock mu(self, req->lock_);
+      MutexLock mu(self, req->lock);
 
       if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
         VLOG(jdwp) << "      Resuming all threads";
@@ -2768,8 +2758,8 @@
       }
 
       // Wait for the request to finish executing.
-      while (req->invoke_needed_) {
-        req->cond_.Wait(self);
+      while (req->invoke_needed) {
+        req->cond.Wait(self);
       }
     }
     VLOG(jdwp) << "    Control has returned from event thread";
@@ -2826,24 +2816,24 @@
   }
 
   // Translate the method through the vtable, unless the debugger wants to suppress it.
-  mirror::ArtMethod* m = pReq->method_;
-  if ((pReq->options_ & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver_ != NULL) {
-    mirror::ArtMethod* actual_method = pReq->class_->FindVirtualMethodForVirtualOrInterface(pReq->method_);
+  mirror::ArtMethod* m = pReq->method;
+  if ((pReq->options & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver != NULL) {
+    mirror::ArtMethod* actual_method = pReq->klass->FindVirtualMethodForVirtualOrInterface(pReq->method);
     if (actual_method != m) {
       VLOG(jdwp) << "ExecuteMethod translated " << PrettyMethod(m) << " to " << PrettyMethod(actual_method);
       m = actual_method;
     }
   }
   VLOG(jdwp) << "ExecuteMethod " << PrettyMethod(m)
-             << " receiver=" << pReq->receiver_
-             << " arg_count=" << pReq->arg_count_;
+             << " receiver=" << pReq->receiver
+             << " arg_count=" << pReq->arg_count;
   CHECK(m != NULL);
 
   CHECK_EQ(sizeof(jvalue), sizeof(uint64_t));
 
   MethodHelper mh(m);
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
-  arg_array.BuildArgArray(soa, pReq->receiver_, reinterpret_cast<jvalue*>(pReq->arg_values_));
+  arg_array.BuildArgArray(soa, pReq->receiver, reinterpret_cast<jvalue*>(pReq->arg_values));
   InvokeWithArgArray(soa, m, &arg_array, &pReq->result_value, mh.GetShorty()[0]);
 
   mirror::Throwable* exception = soa.Self()->GetException(NULL);
@@ -3372,7 +3362,7 @@
       return HPSG_STATE(SOLIDITY_HARD, KIND_OBJECT);
     }
 
-    if (!Runtime::Current()->GetHeap()->IsHeapAddress(c)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(c)) {
       LOG(ERROR) << "Invalid class for managed heap object: " << o << " " << c;
       return HPSG_STATE(SOLIDITY_HARD, KIND_UNKNOWN);
     }
@@ -3430,6 +3420,14 @@
   JDWP::Set4BE(&heap_id[0], 1);  // Heap id (bogus; we only have one heap).
   Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHST") : CHUNK_TYPE("HPST"), sizeof(heap_id), heap_id);
 
+  Thread* self = Thread::Current();
+
+  // To allow the Walk/InspectAll() below to exclusively-lock the
+  // mutator lock, temporarily release the shared access to the
+  // mutator lock here by transitioning to the suspended state.
+  Locks::mutator_lock_->AssertSharedHeld(self);
+  self->TransitionFromRunnableToSuspended(kSuspended);
+
   // Send a series of heap segment chunks.
   HeapChunkContext context((what == HPSG_WHAT_MERGED_OBJECTS), native);
   if (native) {
@@ -3437,18 +3435,21 @@
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
-    Thread* self = Thread::Current();
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     typedef std::vector<gc::space::ContinuousSpace*>::const_iterator It;
     for (It cur = spaces.begin(), end = spaces.end(); cur != end; ++cur) {
-      if ((*cur)->IsDlMallocSpace()) {
-        (*cur)->AsDlMallocSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
+      if ((*cur)->IsMallocSpace()) {
+        (*cur)->AsMallocSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
       }
     }
     // Walk the large objects, these are not in the AllocSpace.
     heap->GetLargeObjectsSpace()->Walk(HeapChunkContext::HeapChunkCallback, &context);
   }
 
+  // Shared-lock the mutator lock back.
+  self->TransitionFromSuspendedToRunnable();
+  Locks::mutator_lock_->AssertSharedHeld(self);
+
   // Finally, send a heap end chunk.
   Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHEN") : CHUNK_TYPE("HPEN"), sizeof(heap_id), heap_id);
 }
@@ -3489,9 +3490,9 @@
       recent_allocation_records_ = new AllocRecord[gAllocRecordMax];
       CHECK(recent_allocation_records_ != NULL);
     }
-    Runtime::Current()->InstrumentQuickAllocEntryPoints();
+    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
   } else {
-    Runtime::Current()->UninstrumentQuickAllocEntryPoints();
+    Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints();
     delete[] recent_allocation_records_;
     recent_allocation_records_ = NULL;
   }
@@ -3752,7 +3753,6 @@
 
     count = gAllocRecordCount;
     idx = HeadIndex();
-    ClassHelper kh;
     while (count--) {
       // For each entry:
       // (4b) total allocation size
@@ -3761,7 +3761,7 @@
       // (1b) stack depth
       AllocRecord* record = &recent_allocation_records_[idx];
       size_t stack_depth = record->GetDepth();
-      kh.ChangeClass(record->type);
+      ClassHelper kh(record->type);
       size_t allocated_object_class_name_index = class_names.IndexOf(kh.GetDescriptor());
       JDWP::Append4BE(bytes, record->byte_count);
       JDWP::Append2BE(bytes, record->thin_lock_id);
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 8574a33..acbb2c6 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -23,6 +23,7 @@
 
 #include <pthread.h>
 
+#include <set>
 #include <string>
 
 #include "jdwp/jdwp.h"
@@ -47,28 +48,28 @@
  */
 struct DebugInvokeReq {
   DebugInvokeReq()
-      : ready(false), invoke_needed_(false),
-        receiver_(NULL), thread_(NULL), class_(NULL), method_(NULL),
-        arg_count_(0), arg_values_(NULL), options_(0), error(JDWP::ERR_NONE),
+      : ready(false), invoke_needed(false),
+        receiver(NULL), thread(NULL), klass(NULL), method(NULL),
+        arg_count(0), arg_values(NULL), options(0), error(JDWP::ERR_NONE),
         result_tag(JDWP::JT_VOID), exception(0),
-        lock_("a DebugInvokeReq lock", kBreakpointInvokeLock),
-        cond_("a DebugInvokeReq condition variable", lock_) {
+        lock("a DebugInvokeReq lock", kBreakpointInvokeLock),
+        cond("a DebugInvokeReq condition variable", lock) {
   }
 
   /* boolean; only set when we're in the tail end of an event handler */
   bool ready;
 
   /* boolean; set if the JDWP thread wants this thread to do work */
-  bool invoke_needed_;
+  bool invoke_needed;
 
   /* request */
-  mirror::Object* receiver_;      /* not used for ClassType.InvokeMethod */
-  mirror::Object* thread_;
-  mirror::Class* class_;
-  mirror::ArtMethod* method_;
-  uint32_t arg_count_;
-  uint64_t* arg_values_;   /* will be NULL if arg_count_ == 0 */
-  uint32_t options_;
+  mirror::Object* receiver;      /* not used for ClassType.InvokeMethod */
+  mirror::Object* thread;
+  mirror::Class* klass;
+  mirror::ArtMethod* method;
+  uint32_t arg_count;
+  uint64_t* arg_values;   /* will be NULL if arg_count_ == 0 */
+  uint32_t options;
 
   /* result */
   JDWP::JdwpError error;
@@ -77,8 +78,41 @@
   JDWP::ObjectId exception;
 
   /* condition variable to wait on while the method executes */
-  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  ConditionVariable cond_ GUARDED_BY(lock_);
+  Mutex lock DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable cond GUARDED_BY(lock);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DebugInvokeReq);
+};
+
+// Thread local data-structure that holds fields for controlling single-stepping.
+struct SingleStepControl {
+  SingleStepControl()
+      : is_active(false), step_size(JDWP::SS_MIN), step_depth(JDWP::SD_INTO),
+        method(nullptr), stack_depth(0) {
+  }
+
+  // Are we single-stepping right now?
+  bool is_active;
+
+  // See JdwpStepSize and JdwpStepDepth for details.
+  JDWP::JdwpStepSize step_size;
+  JDWP::JdwpStepDepth step_depth;
+
+  // The location this single-step was initiated from.
+  // A single-step is initiated in a suspended thread. We save here the current method and the
+  // set of DEX pcs associated to the source line number where the suspension occurred.
+  // This is used to support SD_INTO and SD_OVER single-step depths so we detect when a single-step
+  // causes the execution of an instruction in a different method or at a different line number.
+  mirror::ArtMethod* method;
+  std::set<uint32_t> dex_pcs;
+
+  // The stack depth when this single-step was initiated. This is used to support SD_OVER and SD_OUT
+  // single-step depth.
+  int stack_depth;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SingleStepControl);
 };
 
 class Dbg {
@@ -230,6 +264,9 @@
   static void OutputVariableTable(JDWP::RefTypeId ref_type_id, JDWP::MethodId id, bool with_generic,
                                   JDWP::ExpandBuf* pReply)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void OutputMethodReturnValue(JDWP::MethodId method_id, const JValue* return_value,
+                                      JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError GetBytecodes(JDWP::RefTypeId class_id, JDWP::MethodId method_id,
                                       std::vector<uint8_t>& bytecodes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -254,6 +291,8 @@
 
   static std::string StringToUtf8(JDWP::ObjectId string_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void OutputJValue(JDWP::JdwpTag tag, const JValue* return_value, JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * Thread, ThreadGroup, Frame
@@ -327,7 +366,8 @@
     kMethodExit     = 0x08,
   };
   static void PostLocationEvent(const mirror::ArtMethod* method, int pcOffset,
-                                mirror::Object* thisPtr, int eventFlags)
+                                mirror::Object* thisPtr, int eventFlags,
+                                const JValue* return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostException(Thread* thread, const ThrowLocation& throw_location,
                             mirror::ArtMethod* catch_method,
@@ -353,9 +393,9 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError ConfigureStep(JDWP::ObjectId thread_id, JDWP::JdwpStepSize size,
                                        JDWP::JdwpStepDepth depth)
-      LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static void UnconfigureStep(JDWP::ObjectId thread_id) LOCKS_EXCLUDED(Locks::breakpoint_lock_);
+  static void UnconfigureStep(JDWP::ObjectId thread_id)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static JDWP::JdwpError InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId object_id,
                                       JDWP::RefTypeId class_id, JDWP::MethodId method_id,
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 7e09a48..517f96c 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -36,6 +36,8 @@
 #include "mirror/string.h"
 #include "os.h"
 #include "safe_map.h"
+#include "ScopedFd.h"
+#include "sirt_ref.h"
 #include "thread.h"
 #include "UniquePtr.h"
 #include "utf-inl.h"
@@ -64,34 +66,34 @@
 
 static int OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
   CHECK(magic != NULL);
-  int fd = open(filename, O_RDONLY, 0);
-  if (fd == -1) {
+  ScopedFd fd(open(filename, O_RDONLY, 0));
+  if (fd.get() == -1) {
     *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
     return -1;
   }
-  int n = TEMP_FAILURE_RETRY(read(fd, magic, sizeof(*magic)));
+  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
   if (n != sizeof(*magic)) {
     *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
     return -1;
   }
-  if (lseek(fd, 0, SEEK_SET) != 0) {
+  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
     *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
                               strerror(errno));
     return -1;
   }
-  return fd;
+  return fd.release();
 }
 
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
   CHECK(checksum != NULL);
   uint32_t magic;
-  int fd = OpenAndReadMagic(filename, &magic, error_msg);
-  if (fd == -1) {
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
+  if (fd.get() == -1) {
     DCHECK(!error_msg->empty());
     return false;
   }
   if (IsZipMagic(magic)) {
-    UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, filename, error_msg));
+    UniquePtr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
     if (zip_archive.get() == NULL) {
       *error_msg = StringPrintf("Failed to open zip archive '%s'", filename);
       return false;
@@ -105,7 +107,7 @@
     return true;
   }
   if (IsDexMagic(magic)) {
-    UniquePtr<const DexFile> dex_file(DexFile::OpenFile(fd, filename, false, error_msg));
+    UniquePtr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), filename, false, error_msg));
     if (dex_file.get() == NULL) {
       return false;
     }
@@ -120,16 +122,16 @@
                              const char* location,
                              std::string* error_msg) {
   uint32_t magic;
-  int fd = OpenAndReadMagic(filename, &magic, error_msg);
-  if (fd == -1) {
+  ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
+  if (fd.get() == -1) {
     DCHECK(!error_msg->empty());
     return NULL;
   }
   if (IsZipMagic(magic)) {
-    return DexFile::OpenZip(fd, location, error_msg);
+    return DexFile::OpenZip(fd.release(), location, error_msg);
   }
   if (IsDexMagic(magic)) {
-    return DexFile::OpenFile(fd, location, true, error_msg);
+    return DexFile::OpenFile(fd.release(), location, true, error_msg);
   }
   *error_msg = StringPrintf("Expected valid zip or dex file: '%s'", filename);
   return nullptr;
@@ -168,26 +170,26 @@
 const DexFile* DexFile::OpenFile(int fd, const char* location, bool verify,
                                  std::string* error_msg) {
   CHECK(location != nullptr);
-  struct stat sbuf;
-  memset(&sbuf, 0, sizeof(sbuf));
-  if (fstat(fd, &sbuf) == -1) {
-    *error_msg = StringPrintf("DexFile: fstat \'%s\' failed: %s", location, strerror(errno));
-    close(fd);
-    return nullptr;
+  UniquePtr<MemMap> map;
+  {
+    ScopedFd delayed_close(fd);
+    struct stat sbuf;
+    memset(&sbuf, 0, sizeof(sbuf));
+    if (fstat(fd, &sbuf) == -1) {
+      *error_msg = StringPrintf("DexFile: fstat \'%s\' failed: %s", location, strerror(errno));
+      return nullptr;
+    }
+    if (S_ISDIR(sbuf.st_mode)) {
+      *error_msg = StringPrintf("Attempt to mmap directory '%s'", location);
+      return nullptr;
+    }
+    size_t length = sbuf.st_size;
+    map.reset(MemMap::MapFile(length, PROT_READ, MAP_PRIVATE, fd, 0, location, error_msg));
+    if (map.get() == nullptr) {
+      DCHECK(!error_msg->empty());
+      return nullptr;
+    }
   }
-  if (S_ISDIR(sbuf.st_mode)) {
-    *error_msg = StringPrintf("Attempt to mmap directory '%s'", location);
-    return nullptr;
-  }
-  size_t length = sbuf.st_size;
-  UniquePtr<MemMap> map(MemMap::MapFile(length, PROT_READ, MAP_PRIVATE, fd, 0, location,
-                                        error_msg));
-  if (map.get() == nullptr) {
-    DCHECK(!error_msg->empty());
-    close(fd);
-    return nullptr;
-  }
-  close(fd);
 
   if (map->Size() < sizeof(DexFile::Header)) {
     *error_msg = StringPrintf(
@@ -220,7 +222,7 @@
     DCHECK(!error_msg->empty());
     return nullptr;
   }
-  return DexFile::Open(*zip_archive.get(), location, error_msg);
+  return DexFile::Open(*zip_archive, location, error_msg);
 }
 
 const DexFile* DexFile::OpenMemory(const std::string& location,
@@ -508,7 +510,8 @@
 }
 
 const DexFile::ProtoId* DexFile::FindProtoId(uint16_t return_type_idx,
-                                         const std::vector<uint16_t>& signature_type_idxs) const {
+                                             const uint16_t* signature_type_idxs,
+                                             uint32_t signature_length) const {
   int32_t lo = 0;
   int32_t hi = NumProtoIds() - 1;
   while (hi >= lo) {
@@ -518,7 +521,7 @@
     if (compare == 0) {
       DexFileParameterIterator it(*this, proto);
       size_t i = 0;
-      while (it.HasNext() && i < signature_type_idxs.size() && compare == 0) {
+      while (it.HasNext() && i < signature_length && compare == 0) {
         compare = signature_type_idxs[i] - it.GetTypeIdx();
         it.Next();
         i++;
@@ -526,7 +529,7 @@
       if (compare == 0) {
         if (it.HasNext()) {
           compare = -1;
-        } else if (i < signature_type_idxs.size()) {
+        } else if (i < signature_length) {
           compare = 1;
         }
       }
@@ -552,22 +555,19 @@
   size_t end = signature.size();
   bool process_return = false;
   while (offset < end) {
+    size_t start_offset = offset;
     char c = signature[offset];
     offset++;
     if (c == ')') {
       process_return = true;
       continue;
     }
-    // TODO: avoid building a string.
-    std::string descriptor;
-    descriptor += c;
     while (c == '[') {  // process array prefix
       if (offset >= end) {  // expect some descriptor following [
         return false;
       }
       c = signature[offset];
       offset++;
-      descriptor += c;
     }
     if (c == 'L') {  // process type descriptors
       do {
@@ -576,9 +576,10 @@
         }
         c = signature[offset];
         offset++;
-        descriptor += c;
       } while (c != ';');
     }
+    // TODO: avoid creating a std::string just to get a 0-terminated char array
+    std::string descriptor(signature.data() + start_offset, offset - start_offset);
     const DexFile::StringId* string_id = FindStringId(descriptor.c_str());
     if (string_id == NULL) {
       return false;
@@ -719,9 +720,9 @@
   for (;;)  {
     uint8_t opcode = *stream++;
     uint16_t reg;
-    uint16_t name_idx;
-    uint16_t descriptor_idx;
-    uint16_t signature_idx = 0;
+    uint32_t name_idx;
+    uint32_t descriptor_idx;
+    uint32_t signature_idx = 0;
 
     switch (opcode) {
       case DBG_END_SEQUENCE:
@@ -875,6 +876,32 @@
   return result;
 }
 
+bool Signature::operator==(const StringPiece& rhs) const {
+  if (dex_file_ == nullptr) {
+    return false;
+  }
+  StringPiece tail(rhs);
+  if (!tail.starts_with("(")) {
+    return false;  // Invalid signature
+  }
+  tail.remove_prefix(1);  // "(";
+  const DexFile::TypeList* params = dex_file_->GetProtoParameters(*proto_id_);
+  if (params != nullptr) {
+    for (uint32_t i = 0; i < params->Size(); ++i) {
+      StringPiece param(dex_file_->StringByTypeIdx(params->GetTypeItem(i).type_idx_));
+      if (!tail.starts_with(param)) {
+        return false;
+      }
+      tail.remove_prefix(param.length());
+    }
+  }
+  if (!tail.starts_with(")")) {
+    return false;
+  }
+  tail.remove_prefix(1);  // ")";
+  return tail == dex_file_->StringByTypeIdx(proto_id_->return_type_idx_);
+}
+
 std::ostream& operator<<(std::ostream& os, const Signature& sig) {
   return os << sig.ToString();
 }
@@ -962,12 +989,14 @@
 }
 
 EncodedStaticFieldValueIterator::EncodedStaticFieldValueIterator(const DexFile& dex_file,
-                                                                 mirror::DexCache* dex_cache,
-                                                                 mirror::ClassLoader* class_loader,
+                                                                 SirtRef<mirror::DexCache>* dex_cache,
+                                                                 SirtRef<mirror::ClassLoader>* class_loader,
                                                                  ClassLinker* linker,
                                                                  const DexFile::ClassDef& class_def)
     : dex_file_(dex_file), dex_cache_(dex_cache), class_loader_(class_loader), linker_(linker),
       array_size_(), pos_(-1), type_(kByte) {
+  DCHECK(dex_cache != nullptr);
+  DCHECK(class_loader != nullptr);
   ptr_ = dex_file.GetEncodedStaticFieldValuesArray(class_def);
   if (ptr_ == NULL) {
     array_size_ = 0;
@@ -1050,12 +1079,15 @@
     case kDouble:  field->SetDouble(field->GetDeclaringClass(), jval_.d); break;
     case kNull:    field->SetObject(field->GetDeclaringClass(), NULL); break;
     case kString: {
-      mirror::String* resolved = linker_->ResolveString(dex_file_, jval_.i, dex_cache_);
+      CHECK(!kMovingFields);
+      mirror::String* resolved = linker_->ResolveString(dex_file_, jval_.i, *dex_cache_);
       field->SetObject(field->GetDeclaringClass(), resolved);
       break;
     }
     case kType: {
-      mirror::Class* resolved = linker_->ResolveType(dex_file_, jval_.i, dex_cache_, class_loader_);
+      CHECK(!kMovingFields);
+      mirror::Class* resolved = linker_->ResolveType(dex_file_, jval_.i, *dex_cache_,
+                                                     *class_loader_);
       field->SetObject(field->GetDeclaringClass(), resolved);
       break;
     }
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index a9c24e6..69593cd 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -43,6 +43,8 @@
 }  // namespace mirror
 class ClassLinker;
 class Signature;
+template <typename T>
+class SirtRef;
 class StringPiece;
 class ZipArchive;
 
@@ -659,7 +661,11 @@
 
   // Looks up a proto id for a given return type and signature type list
   const ProtoId* FindProtoId(uint16_t return_type_idx,
-                             const std::vector<uint16_t>& signature_type_idxs_) const;
+                             const uint16_t* signature_type_idxs, uint32_t signature_length) const;
+  const ProtoId* FindProtoId(uint16_t return_type_idx,
+                             const std::vector<uint16_t>& signature_type_idxs) const {
+    return FindProtoId(return_type_idx, &signature_type_idxs[0], signature_type_idxs.size());
+  }
 
   // Given a signature place the type ids into the given vector, returns true on success
   bool CreateTypeList(const StringPiece& signature, uint16_t* return_type_idx,
@@ -958,10 +964,7 @@
     return !(*this == rhs);
   }
 
-  bool operator==(const StringPiece& rhs) const {
-    // TODO: Avoid temporary string allocation.
-    return ToString() == rhs;
-  }
+  bool operator==(const StringPiece& rhs) const;
 
  private:
   Signature(const DexFile* dex, const DexFile::ProtoId& proto) : dex_file_(dex), proto_id_(&proto) {
@@ -1152,8 +1155,8 @@
 
 class EncodedStaticFieldValueIterator {
  public:
-  EncodedStaticFieldValueIterator(const DexFile& dex_file, mirror::DexCache* dex_cache,
-                                  mirror::ClassLoader* class_loader,
+  EncodedStaticFieldValueIterator(const DexFile& dex_file, SirtRef<mirror::DexCache>* dex_cache,
+                                  SirtRef<mirror::ClassLoader>* class_loader,
                                   ClassLinker* linker, const DexFile::ClassDef& class_def)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -1187,8 +1190,8 @@
   static const byte kEncodedValueArgShift = 5;
 
   const DexFile& dex_file_;
-  mirror::DexCache* dex_cache_;  // Dex cache to resolve literal objects.
-  mirror::ClassLoader* class_loader_;  // ClassLoader to resolve types.
+  SirtRef<mirror::DexCache>* const dex_cache_;  // Dex cache to resolve literal objects.
+  SirtRef<mirror::ClassLoader>* const class_loader_;  // ClassLoader to resolve types.
   ClassLinker* linker_;  // Linker to resolve literal objects.
   size_t array_size_;  // Size of array.
   size_t pos_;  // Current position.
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 24ab1ce..2806f94 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -33,20 +33,20 @@
 
 namespace art {
 
-static inline bool CheckFilledNewArrayAlloc(uint32_t type_idx, mirror::ArtMethod* referrer,
-                                            int32_t component_count, Thread* self,
-                                            bool access_check, mirror::Class** klass_ptr)
+static inline mirror::Class* CheckFilledNewArrayAlloc(uint32_t type_idx, mirror::ArtMethod* referrer,
+                                                      int32_t component_count, Thread* self,
+                                                      bool access_check)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(component_count < 0)) {
     ThrowNegativeArraySizeException(component_count);
-    return false;  // Failure
+    return nullptr;  // Failure
   }
   mirror::Class* klass = referrer->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
   if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, referrer);
     if (klass == NULL) {  // Error
       DCHECK(self->IsExceptionPending());
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
   if (UNLIKELY(klass->IsPrimitive() && !klass->IsPrimitiveInt())) {
@@ -60,40 +60,43 @@
                                "Found type %s; filled-new-array not implemented for anything but \'int\'",
                                PrettyDescriptor(klass).c_str());
     }
-    return false;  // Failure
+    return nullptr;  // Failure
   }
   if (access_check) {
     mirror::Class* referrer_klass = referrer->GetDeclaringClass();
     if (UNLIKELY(!referrer_klass->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer_klass, klass);
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
   DCHECK(klass->IsArrayClass()) << PrettyClass(klass);
-  *klass_ptr = klass;
-  return true;
+  return klass;
 }
 
 // Helper function to allocate array for FILLED_NEW_ARRAY.
 mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* referrer,
                                           int32_t component_count, Thread* self,
-                                          bool access_check) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, access_check, &klass))) {
-    return NULL;
+                                          bool access_check,
+                                          gc::AllocatorType allocator_type) {
+  mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self,
+                                                  access_check);
+  if (UNLIKELY(klass == nullptr)) {
+    return nullptr;
   }
-  return mirror::Array::AllocUninstrumented(self, klass, component_count);
+  return mirror::Array::Alloc<false>(self, klass, component_count, allocator_type);
 }
 
 // Helper function to allocate array for FILLED_NEW_ARRAY.
 mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* referrer,
                                                       int32_t component_count, Thread* self,
-                                                      bool access_check) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, access_check, &klass))) {
-    return NULL;
+                                                      bool access_check,
+                                                      gc::AllocatorType allocator_type) {
+  mirror::Class* klass = CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self,
+                                                  access_check);
+  if (UNLIKELY(klass == nullptr)) {
+    return nullptr;
   }
-  return mirror::Array::AllocInstrumented(self, klass, component_count);
+  return mirror::Array::Alloc<true>(self, klass, component_count, allocator_type);
 }
 
 void ThrowStackOverflowError(Thread* self) {
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 7ce50c5..747dd56 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -27,9 +27,11 @@
 #include "mirror/art_method.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
 #include "mirror/throwable.h"
+#include "locks.h"
 #include "object_utils.h"
-
+#include "sirt_ref.h"
 #include "thread.h"
 
 namespace art {
@@ -40,130 +42,122 @@
   class Object;
 }  // namespace mirror
 
-static inline bool CheckObjectAlloc(uint32_t type_idx, mirror::ArtMethod* method,
-                                    Thread* self,
-                                    bool access_check,
-                                    mirror::Class** klass_ptr)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+// TODO: Fix no thread safety analysis when GCC can handle template specialization.
+template <const bool kAccessCheck>
+ALWAYS_INLINE static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
+                                                            mirror::ArtMethod* method,
+                                                            Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS {
   mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
-  Runtime* runtime = Runtime::Current();
   if (UNLIKELY(klass == NULL)) {
-    klass = runtime->GetClassLinker()->ResolveType(type_idx, method);
+    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
     if (klass == NULL) {
       DCHECK(self->IsExceptionPending());
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
-  if (access_check) {
+  if (kAccessCheck) {
     if (UNLIKELY(!klass->IsInstantiable())) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
       self->ThrowNewException(throw_location, "Ljava/lang/InstantiationError;",
                               PrettyDescriptor(klass).c_str());
-      return false;  // Failure
+      return nullptr;  // Failure
     }
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
-  if (!klass->IsInitialized() &&
-      !runtime->GetClassLinker()->EnsureInitialized(klass, true, true)) {
-    DCHECK(self->IsExceptionPending());
-    return false;  // Failure
+  if (UNLIKELY(!klass->IsInitialized())) {
+    SirtRef<mirror::Class> sirt_klass(self, klass);
+    // The class initializer might cause a GC.
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(klass, true, true)) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;  // Failure
+    }
+    return sirt_klass.get();
   }
-  *klass_ptr = klass;
-  return true;
+  return klass;
 }
 
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
 // cannot be resolved, throw an error. If it can, use it to create an instance.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
-static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                  Thread* self,
-                                                  bool access_check)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckObjectAlloc(type_idx, method, self, access_check, &klass))) {
-    return NULL;
+// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
+template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
+                                                                mirror::ArtMethod* method,
+                                                                Thread* self,
+                                                                gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self);
+  if (UNLIKELY(klass == nullptr)) {
+    return nullptr;
   }
-  return klass->AllocObjectUninstrumented(self);
+  return klass->Alloc<kInstrumented>(self, allocator_type);
 }
 
-static inline mirror::Object* AllocObjectFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                              Thread* self,
-                                                              bool access_check)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckObjectAlloc(type_idx, method, self, access_check, &klass))) {
-    return NULL;
-  }
-  return klass->AllocObjectInstrumented(self);
-}
-
-static inline bool CheckArrayAlloc(uint32_t type_idx, mirror::ArtMethod* method,
-                                   int32_t component_count,
-                                   bool access_check, mirror::Class** klass_ptr)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+// TODO: Fix no thread safety analysis when GCC can handle template specialization.
+template <bool kAccessCheck>
+ALWAYS_INLINE static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
+                                                           mirror::ArtMethod* method,
+                                                           int32_t component_count)
+    NO_THREAD_SAFETY_ANALYSIS {
   if (UNLIKELY(component_count < 0)) {
     ThrowNegativeArraySizeException(component_count);
-    return false;  // Failure
+    return nullptr;  // Failure
   }
   mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
-  if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
+  if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
     if (klass == NULL) {  // Error
       DCHECK(Thread::Current()->IsExceptionPending());
-      return false;  // Failure
+      return nullptr;  // Failure
     }
     CHECK(klass->IsArrayClass()) << PrettyClass(klass);
   }
-  if (access_check) {
+  if (kAccessCheck) {
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
-      return false;  // Failure
+      return nullptr;  // Failure
     }
   }
-  *klass_ptr = klass;
-  return true;
+  return klass;
 }
 
 // Given the context of a calling Method, use its DexCache to resolve a type to an array Class. If
 // it cannot be resolved, throw an error. If it can, use it to create an array.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
-static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                int32_t component_count,
-                                                Thread* self, bool access_check)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckArrayAlloc(type_idx, method, component_count, access_check, &klass))) {
-    return NULL;
+// TODO: Fix no thread safety analysis when GCC can handle template specialization.
+template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
+                                                              mirror::ArtMethod* method,
+                                                              int32_t component_count,
+                                                              Thread* self,
+                                                              gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  mirror::Class* klass = CheckArrayAlloc<kAccessCheck>(type_idx, method, component_count);
+  if (UNLIKELY(klass == nullptr)) {
+    return nullptr;
   }
-  return mirror::Array::AllocUninstrumented(self, klass, component_count);
-}
-
-static inline mirror::Array* AllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                            int32_t component_count,
-                                                            Thread* self, bool access_check)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass;
-  if (UNLIKELY(!CheckArrayAlloc(type_idx, method, component_count, access_check, &klass))) {
-    return NULL;
-  }
-  return mirror::Array::AllocInstrumented(self, klass, component_count);
+  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count, allocator_type);
 }
 
 extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                 int32_t component_count,
-                                                 Thread* self, bool access_check)
+                                                 int32_t component_count, Thread* self,
+                                                 bool access_check,
+                                                 gc::AllocatorType allocator_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-extern mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                             int32_t component_count,
-                                                             Thread* self, bool access_check)
+extern mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx,
+                                                             mirror::ArtMethod* method,
+                                                             int32_t component_count, Thread* self,
+                                                             bool access_check,
+                                                             gc::AllocatorType allocator_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Type of find field operation for fast and slow case.
diff --git a/runtime/entrypoints/portable/portable_alloc_entrypoints.cc b/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
index 91b7353..0d57516 100644
--- a/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_alloc_entrypoints.cc
@@ -24,14 +24,14 @@
                                                                mirror::ArtMethod* referrer,
                                                                Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocObjectFromCode(type_idx, referrer, thread, false);
+  return AllocObjectFromCode<false, true>(type_idx, referrer, thread, gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_alloc_object_from_code_with_access_check(uint32_t type_idx,
                                                                                  mirror::ArtMethod* referrer,
                                                                                  Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocObjectFromCode(type_idx, referrer, thread, true);
+  return AllocObjectFromCode<true, true>(type_idx, referrer, thread, gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_alloc_array_from_code(uint32_t type_idx,
@@ -39,7 +39,8 @@
                                                               uint32_t length,
                                                               Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocArrayFromCode(type_idx, referrer, length, self, false);
+  return AllocArrayFromCode<false, true>(type_idx, referrer, length, self,
+                                         gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_alloc_array_from_code_with_access_check(uint32_t type_idx,
@@ -47,7 +48,8 @@
                                                                                 uint32_t length,
                                                                                 Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return AllocArrayFromCode(type_idx, referrer, length, self, true);
+  return AllocArrayFromCode<true, true>(type_idx, referrer, length, self,
+                                        gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_check_and_alloc_array_from_code(uint32_t type_idx,
@@ -55,7 +57,8 @@
                                                                         uint32_t length,
                                                                         Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return CheckAndAllocArrayFromCode(type_idx, referrer, length, thread, false);
+  return CheckAndAllocArrayFromCodeInstrumented(type_idx, referrer, length, thread, false,
+                                                gc::kAllocatorTypeFreeList);
 }
 
 extern "C" mirror::Object* art_portable_check_and_alloc_array_from_code_with_access_check(uint32_t type_idx,
@@ -63,7 +66,8 @@
                                                                                           uint32_t length,
                                                                                           Thread* thread)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return CheckAndAllocArrayFromCode(type_idx, referrer, length, thread, true);
+  return CheckAndAllocArrayFromCodeInstrumented(type_idx, referrer, length, thread, true,
+                                                gc::kAllocatorTypeFreeList);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 6f7b1ab..9155088 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -23,110 +23,63 @@
 
 namespace art {
 
-extern "C" mirror::Object* artAllocObjectFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                  Thread* self, mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocObjectFromCode(type_idx, method, self, false);
+#define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \
+extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
+} \
+extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocObjectFromCode<true, instrumented_bool>(type_idx, method, self, allocator_type); \
+} \
+extern "C" mirror::Array* artAllocArrayFromCode##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocArrayFromCode<false, instrumented_bool>(type_idx, method, component_count, self, \
+                                                      allocator_type); \
+} \
+extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocArrayFromCode<true, instrumented_bool>(type_idx, method, component_count, self, \
+                                                     allocator_type); \
+} \
+extern "C" mirror::Array* artCheckAndAllocArrayFromCode##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  if (!instrumented_bool) { \
+    return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, false, allocator_type); \
+  } else { \
+    return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false, allocator_type); \
+  } \
+} \
+extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
+    uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  if (!instrumented_bool) { \
+    return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, true, allocator_type); \
+  } else { \
+    return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true, allocator_type); \
+  } \
 }
 
-extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck(uint32_t type_idx,
-                                                                 mirror::ArtMethod* method,
-                                                                 Thread* self,
-                                                                 mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocObjectFromCode(type_idx, method, self, true);
-}
+#define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(suffix, allocator_type) \
+    GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, Instrumented, true, allocator_type) \
+    GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, , false, allocator_type)
 
-extern "C" mirror::Array* artAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
-                                                int32_t component_count, Thread* self,
-                                                mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocArrayFromCode(type_idx, method, component_count, self, false);
-}
-
-extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheck(uint32_t type_idx,
-                                                               mirror::ArtMethod* method,
-                                                               int32_t component_count,
-                                                               Thread* self,
-                                                               mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocArrayFromCode(type_idx, method, component_count, self, true);
-}
-
-extern "C" mirror::Array* artCheckAndAllocArrayFromCode(uint32_t type_idx,
-                                                        mirror::ArtMethod* method,
-                                                        int32_t component_count, Thread* self,
-                                                        mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, false);
-}
-
-extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheck(uint32_t type_idx,
-                                                                       mirror::ArtMethod* method,
-                                                                       int32_t component_count,
-                                                                       Thread* self,
-                                                                       mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, true);
-}
-
-extern "C" mirror::Object* artAllocObjectFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                              Thread* self, mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocObjectFromCodeInstrumented(type_idx, method, self, false);
-}
-
-extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
-                                                                             mirror::ArtMethod* method,
-                                                                             Thread* self,
-                                                                             mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocObjectFromCodeInstrumented(type_idx, method, self, true);
-}
-
-extern "C" mirror::Array* artAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
-                                                            int32_t component_count, Thread* self,
-                                                              mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false);
-}
-
-extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
-                                                                           mirror::ArtMethod* method,
-                                                                           int32_t component_count,
-                                                                           Thread* self,
-                                                                           mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return AllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true);
-}
-
-extern "C" mirror::Array* artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx,
-                                                                    mirror::ArtMethod* method,
-                                                                    int32_t component_count, Thread* self,
-                                                                    mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false);
-}
-
-extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
-                                                                                   mirror::ArtMethod* method,
-                                                                                   int32_t component_count,
-                                                                                   Thread* self,
-                                                                                   mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true);
-}
+GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(, gc::kAllocatorTypeFreeList)
+GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(BumpPointer, gc::kAllocatorTypeBumpPointer)
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 2102ab1..540abb3 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -29,9 +29,15 @@
                               "Null reference used for synchronization (monitor-enter)");
     return -1;  // Failure.
   } else {
-    obj->MonitorEnter(self);  // May block
-    DCHECK(self->HoldsLock(obj));
-    DCHECK(!self->IsExceptionPending());
+    if (kIsDebugBuild) {
+      // GC may move the obj, need Sirt for the following DCHECKs.
+      SirtRef<mirror::Object> sirt_obj(self, obj);
+      obj->MonitorEnter(self);  // May block
+      CHECK(self->HoldsLock(sirt_obj.get()));
+      CHECK(!self->IsExceptionPending());
+    } else {
+      obj->MonitorEnter(self);  // May block
+    }
     return 0;  // Success.
     // Only possible exception is NPE and is handled before entry
   }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 01d3549..8ba08ee 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -416,10 +416,10 @@
 
 // Read object references held in arguments from quick frames and place in a JNI local references,
 // so they don't get garbage collected.
-class RememberFoGcArgumentVisitor : public QuickArgumentVisitor {
+class RememberForGcArgumentVisitor : public QuickArgumentVisitor {
  public:
-  RememberFoGcArgumentVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty,
-                              uint32_t shorty_len, ScopedObjectAccessUnchecked* soa) :
+  RememberForGcArgumentVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty,
+                               uint32_t shorty_len, ScopedObjectAccessUnchecked* soa) :
     QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa) {}
 
   virtual void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -441,7 +441,7 @@
  private:
   ScopedObjectAccessUnchecked* soa_;
   std::vector<std::pair<jobject, mirror::Object**> > references_;
-  DISALLOW_COPY_AND_ASSIGN(RememberFoGcArgumentVisitor);
+  DISALLOW_COPY_AND_ASSIGN(RememberForGcArgumentVisitor);
 };
 
 // Lazily resolve a method for quick. Called by stub code.
@@ -531,7 +531,7 @@
   uint32_t shorty_len;
   const char* shorty =
       dex_file->GetMethodShorty(dex_file->GetMethodId(dex_method_idx), &shorty_len);
-  RememberFoGcArgumentVisitor visitor(sp, invoke_type == kStatic, shorty, shorty_len, &soa);
+  RememberForGcArgumentVisitor visitor(sp, invoke_type == kStatic, shorty, shorty_len, &soa);
   visitor.VisitArguments();
   thread->EndAssertNoThreadSuspension(old_cause);
   // Resolve method filling in dex cache.
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index a5f9997..e9a6e4f 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -39,7 +39,7 @@
     ScopedObjectAccess soa(Thread::Current());
     SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
                                       soa.Decode<mirror::ClassLoader*>(LoadDex("ExceptionHandle")));
-    my_klass_ = class_linker_->FindClass("LExceptionHandle;", class_loader.get());
+    my_klass_ = class_linker_->FindClass("LExceptionHandle;", class_loader);
     ASSERT_TRUE(my_klass_ != NULL);
     class_linker_->EnsureInitialized(my_klass_, true, true);
 
diff --git a/runtime/gc/accounting/mod_union_table-inl.h b/runtime/gc/accounting/mod_union_table-inl.h
index fb425df..19c6768 100644
--- a/runtime/gc/accounting/mod_union_table-inl.h
+++ b/runtime/gc/accounting/mod_union_table-inl.h
@@ -37,7 +37,7 @@
     typedef std::vector<space::ContinuousSpace*>::const_iterator It;
     for (It it = spaces.begin(); it != spaces.end(); ++it) {
       if ((*it)->Contains(ref)) {
-        return (*it)->IsDlMallocSpace();
+        return (*it)->IsMallocSpace();
       }
     }
     // Assume it points to a large object.
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 7cbe94d..faa198a 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -82,7 +82,7 @@
     if (ref != nullptr) {
       Object* new_ref = visitor_(ref, arg_);
       if (new_ref != ref) {
-        obj->SetFieldObject(offset, ref, false, true);
+        obj->SetFieldObject(offset, new_ref, true);
       }
     }
   }
@@ -154,7 +154,7 @@
     // We don't have an early exit since we use the visitor pattern, an early
     // exit should significantly speed this up.
     AddToReferenceArrayVisitor visitor(mod_union_table_, references_);
-    collector::MarkSweep::VisitObjectReferences(obj, visitor);
+    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
   }
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
@@ -206,7 +206,7 @@
     Locks::heap_bitmap_lock_->AssertSharedHeld(Thread::Current());
     DCHECK(obj != NULL);
     CheckReferenceVisitor visitor(mod_union_table_, references_);
-    collector::MarkSweep::VisitObjectReferences(obj, visitor);
+    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
   }
 
  private:
@@ -334,7 +334,7 @@
   for (const byte* card_addr : cleared_cards_) {
     auto start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
     auto end = start + CardTable::kCardSize;
-    os << reinterpret_cast<void*>(start) << "-" << reinterpret_cast<void*>(end) << ",";
+    os << reinterpret_cast<void*>(start) << "-" << reinterpret_cast<void*>(end) << "\n";
   }
   os << "]";
 }
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h
new file mode 100644
index 0000000..f395314
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc-inl.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_INL_H_
+#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_INL_H_
+
+#include "rosalloc.h"
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) {
+  if (UNLIKELY(size > kLargeSizeThreshold)) {
+    return AllocLargeObject(self, size, bytes_allocated);
+  }
+  void* m = AllocFromRun(self, size, bytes_allocated);
+  // Check if the returned memory is really all zero.
+  if (kCheckZeroMemory && m != NULL) {
+    byte* bytes = reinterpret_cast<byte*>(m);
+    for (size_t i = 0; i < size; ++i) {
+      DCHECK_EQ(bytes[i], 0);
+    }
+  }
+  return m;
+}
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_INL_H_
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
new file mode 100644
index 0000000..3030fa7
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -0,0 +1,1615 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/mutex-inl.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "rosalloc.h"
+
+#include <map>
+#include <list>
+#include <vector>
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+extern "C" void* art_heap_rosalloc_morecore(RosAlloc* rosalloc, intptr_t increment);
+
+size_t RosAlloc::bracketSizes[kNumOfSizeBrackets];
+size_t RosAlloc::numOfPages[kNumOfSizeBrackets];
+size_t RosAlloc::numOfSlots[kNumOfSizeBrackets];
+size_t RosAlloc::headerSizes[kNumOfSizeBrackets];
+size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets];
+size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
+bool RosAlloc::initialized_ = false;
+
+RosAlloc::RosAlloc(void* base, size_t capacity)
+    : base_(reinterpret_cast<byte*>(base)), footprint_(capacity),
+      capacity_(capacity),
+      lock_("rosalloc global lock", kRosAllocGlobalLock),
+      bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock) {
+  DCHECK(RoundUp(capacity, kPageSize) == capacity);
+  if (!initialized_) {
+    Initialize();
+  }
+  VLOG(heap) << "RosAlloc base="
+             << std::hex << (intptr_t)base_ << ", end="
+             << std::hex << (intptr_t)(base_ + capacity_)
+             << ", capacity=" << std::dec << capacity_;
+  memset(current_runs_, 0, sizeof(current_runs_));
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    size_bracket_locks_[i] = new Mutex("an rosalloc size bracket lock",
+                                       kRosAllocBracketLock);
+  }
+  size_t num_of_pages = capacity_ / kPageSize;
+  page_map_.resize(num_of_pages);
+  free_page_run_size_map_.resize(num_of_pages);
+
+  FreePageRun* free_pages = reinterpret_cast<FreePageRun*>(base_);
+  if (kIsDebugBuild) {
+    free_pages->magic_num_ = kMagicNumFree;
+  }
+  free_pages->SetByteSize(this, capacity_);
+  DCHECK_EQ(capacity_ % kPageSize, static_cast<size_t>(0));
+  free_pages->ReleasePages(this);
+  free_page_runs_.insert(free_pages);
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::RosAlloc() : Inserted run 0x" << std::hex
+              << reinterpret_cast<intptr_t>(free_pages)
+              << " into free_page_runs_";
+  }
+}
+
+void* RosAlloc::AllocPages(Thread* self, size_t num_pages, byte page_map_type) {
+  lock_.AssertHeld(self);
+  DCHECK(page_map_type == kPageMapRun || page_map_type == kPageMapLargeObject);
+  FreePageRun* res = NULL;
+  size_t req_byte_size = num_pages * kPageSize;
+  // Find the lowest address free page run that's large enough.
+  for (auto it = free_page_runs_.begin(); it != free_page_runs_.end(); ) {
+    FreePageRun* fpr = *it;
+    DCHECK(fpr->IsFree());
+    size_t fpr_byte_size = fpr->ByteSize(this);
+    DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
+    if (req_byte_size <= fpr_byte_size) {
+      // Found one.
+      free_page_runs_.erase(it++);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x"
+                  << std::hex << reinterpret_cast<intptr_t>(fpr)
+                  << " from free_page_runs_";
+      }
+      if (req_byte_size < fpr_byte_size) {
+        // Split.
+        FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
+        if (kIsDebugBuild) {
+          remainder->magic_num_ = kMagicNumFree;
+        }
+        remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
+        DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        // Don't need to call madvise on remainder here.
+        free_page_runs_.insert(remainder);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(remainder)
+                    << " into free_page_runs_";
+        }
+        fpr->SetByteSize(this, req_byte_size);
+        DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+      }
+      res = fpr;
+      break;
+    } else {
+      ++it;
+    }
+  }
+
+  // Failed to allocate pages. Grow the footprint, if possible.
+  if (UNLIKELY(res == NULL && capacity_ > footprint_)) {
+    FreePageRun* last_free_page_run = NULL;
+    size_t last_free_page_run_size;
+    auto it = free_page_runs_.rbegin();
+    if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
+      // There is a free page run at the end.
+      DCHECK(last_free_page_run->IsFree());
+      DCHECK(page_map_[ToPageMapIndex(last_free_page_run)] == kPageMapEmpty);
+      last_free_page_run_size = last_free_page_run->ByteSize(this);
+    } else {
+      // There is no free page run at the end.
+      last_free_page_run_size = 0;
+    }
+    DCHECK_LT(last_free_page_run_size, req_byte_size);
+    if (capacity_ - footprint_ + last_free_page_run_size >= req_byte_size) {
+      // If we grow the heap, we can allocate it.
+      size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
+                                  capacity_ - footprint_);
+      DCHECK_EQ(increment % kPageSize, static_cast<size_t>(0));
+      size_t new_footprint = footprint_ + increment;
+      size_t new_num_of_pages = new_footprint / kPageSize;
+      DCHECK_LT(page_map_.size(), new_num_of_pages);
+      DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
+      page_map_.resize(new_num_of_pages);
+      free_page_run_size_map_.resize(new_num_of_pages);
+      art_heap_rosalloc_morecore(this, increment);
+      if (last_free_page_run_size > 0) {
+        // There was a free page run at the end. Expand its size.
+        DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
+        last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
+        DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        DCHECK(last_free_page_run->End(this) == base_ + new_footprint);
+      } else {
+        // Otherwise, insert a new free page run at the end.
+        FreePageRun* new_free_page_run = reinterpret_cast<FreePageRun*>(base_ + footprint_);
+        if (kIsDebugBuild) {
+          new_free_page_run->magic_num_ = kMagicNumFree;
+        }
+        new_free_page_run->SetByteSize(this, increment);
+        DCHECK_EQ(new_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        free_page_runs_.insert(new_free_page_run);
+        DCHECK(*free_page_runs_.rbegin() == new_free_page_run);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AlloPages() : Grew the heap by inserting run 0x"
+                    << std::hex << reinterpret_cast<intptr_t>(new_free_page_run)
+                    << " into free_page_runs_";
+        }
+      }
+      DCHECK_LE(footprint_ + increment, capacity_);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : increased the footprint from "
+                  << footprint_ << " to " << new_footprint;
+      }
+      footprint_ = new_footprint;
+
+      // And retry the last free page run.
+      it = free_page_runs_.rbegin();
+      DCHECK(it != free_page_runs_.rend());
+      FreePageRun* fpr = *it;
+      if (kIsDebugBuild && last_free_page_run_size > 0) {
+        DCHECK(last_free_page_run != NULL);
+        DCHECK_EQ(last_free_page_run, fpr);
+      }
+      size_t fpr_byte_size = fpr->ByteSize(this);
+      DCHECK_EQ(fpr_byte_size % kPageSize, static_cast<size_t>(0));
+      DCHECK_LE(req_byte_size, fpr_byte_size);
+      free_page_runs_.erase(fpr);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::AllocPages() : Erased run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
+                  << " from free_page_runs_";
+      }
+      if (req_byte_size < fpr_byte_size) {
+        // Split if there's a remainder.
+        FreePageRun* remainder = reinterpret_cast<FreePageRun*>(reinterpret_cast<byte*>(fpr) + req_byte_size);
+        if (kIsDebugBuild) {
+          remainder->magic_num_ = kMagicNumFree;
+        }
+        remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
+        DCHECK_EQ(remainder->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        free_page_runs_.insert(remainder);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(remainder)
+                    << " into free_page_runs_";
+        }
+        fpr->SetByteSize(this, req_byte_size);
+        DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+      }
+      res = fpr;
+    }
+  }
+  if (LIKELY(res != NULL)) {
+    // Update the page map.
+    size_t page_map_idx = ToPageMapIndex(res);
+    for (size_t i = 0; i < num_pages; i++) {
+      DCHECK(page_map_[page_map_idx + i] == kPageMapEmpty);
+    }
+    switch (page_map_type) {
+    case kPageMapRun:
+      page_map_[page_map_idx] = kPageMapRun;
+      for (size_t i = 1; i < num_pages; i++) {
+        page_map_[page_map_idx + i] = kPageMapRunPart;
+      }
+      break;
+    case kPageMapLargeObject:
+      page_map_[page_map_idx] = kPageMapLargeObject;
+      for (size_t i = 1; i < num_pages; i++) {
+        page_map_[page_map_idx + i] = kPageMapLargeObjectPart;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unreachable - page map type: " << page_map_type;
+      break;
+    }
+    if (kIsDebugBuild) {
+      // Clear the first page which isn't madvised away in the debug
+      // build for the magic number.
+      memset(res, 0, kPageSize);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocPages() : 0x" << std::hex << reinterpret_cast<intptr_t>(res)
+                << "-0x" << (reinterpret_cast<intptr_t>(res) + num_pages * kPageSize)
+                << "(" << std::dec << (num_pages * kPageSize) << ")";
+    }
+    return res;
+  }
+
+  // Fail.
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::AllocPages() : NULL";
+  }
+  return nullptr;
+}
+
+void RosAlloc::FreePages(Thread* self, void* ptr) {
+  lock_.AssertHeld(self);
+  size_t pm_idx = ToPageMapIndex(ptr);
+  DCHECK(pm_idx < page_map_.size());
+  byte pm_type = page_map_[pm_idx];
+  DCHECK(pm_type == kPageMapRun || pm_type == kPageMapLargeObject);
+  byte pm_part_type;
+  switch (pm_type) {
+  case kPageMapRun:
+    pm_part_type = kPageMapRunPart;
+    break;
+  case kPageMapLargeObject:
+    pm_part_type = kPageMapLargeObjectPart;
+    break;
+  default:
+    pm_part_type = kPageMapEmpty;
+    LOG(FATAL) << "Unreachable - RosAlloc::FreePages() : " << "pm_idx=" << pm_idx << ", pm_type="
+               << static_cast<int>(pm_type) << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    return;
+  }
+  // Update the page map and count the number of pages.
+  size_t num_pages = 1;
+  page_map_[pm_idx] = kPageMapEmpty;
+  size_t idx = pm_idx + 1;
+  size_t end = page_map_.size();
+  while (idx < end && page_map_[idx] == pm_part_type) {
+    page_map_[idx] = kPageMapEmpty;
+    num_pages++;
+    idx++;
+  }
+
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+              << "-0x" << (reinterpret_cast<intptr_t>(ptr) + num_pages * kPageSize)
+              << "(" << std::dec << (num_pages * kPageSize) << ")";
+  }
+
+  // Turn it into a free run.
+  FreePageRun* fpr = reinterpret_cast<FreePageRun*>(ptr);
+  if (kIsDebugBuild) {
+    fpr->magic_num_ = kMagicNumFree;
+  }
+  fpr->SetByteSize(this, num_pages * kPageSize);
+  DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+
+  DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
+  if (!free_page_runs_.empty()) {
+    // Try to coalesce in the higher address direction.
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
+                << std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
+                << std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
+                << (fpr->End(this) == End() ? page_map_.size() : ToPageMapIndex(fpr->End(this))) << "]";
+    }
+    auto higher_it = free_page_runs_.upper_bound(fpr);
+    if (higher_it != free_page_runs_.end()) {
+      for (auto it = higher_it; it != free_page_runs_.end(); ) {
+        FreePageRun* h = *it;
+        DCHECK_EQ(h->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(h->End(this)) << " [" << std::dec
+                    << (h->End(this) == End() ? page_map_.size() : ToPageMapIndex(h->End(this))) << "]";
+        }
+        if (fpr->End(this) == h->Begin()) {
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Success";
+          }
+          free_page_runs_.erase(it++);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(h)
+                      << " from free_page_runs_";
+          }
+          fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
+          DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        } else {
+          // Not adjacent. Stop.
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Fail";
+          }
+          break;
+        }
+      }
+    }
+    // Try to coalesce in the lower address direction.
+    auto lower_it = free_page_runs_.upper_bound(fpr);
+    if (lower_it != free_page_runs_.begin()) {
+      --lower_it;
+      for (auto it = lower_it; ; ) {
+        // We want to try to coalesce with the first element but
+        // there's no "<=" operator for the iterator.
+        bool to_exit_loop = it == free_page_runs_.begin();
+
+        FreePageRun* l = *it;
+        DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
+                    << std::hex << reinterpret_cast<uintptr_t>(l->End(this)) << " [" << std::dec
+                    << (l->End(this) == End() ? page_map_.size() : ToPageMapIndex(l->End(this))) << "]";
+        }
+        if (l->End(this) == fpr->Begin()) {
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Success";
+          }
+          free_page_runs_.erase(it--);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreePages() : (coalesce) Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(l)
+                      << " from free_page_runs_";
+          }
+          l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
+          DCHECK_EQ(l->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+          fpr = l;
+        } else {
+          // Not adjacent. Stop.
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "Fail";
+          }
+          break;
+        }
+        if (to_exit_loop) {
+          break;
+        }
+      }
+    }
+  }
+
+  // Insert it.
+  DCHECK_EQ(fpr->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+  DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
+  fpr->ReleasePages(this);
+  free_page_runs_.insert(fpr);
+  DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreePages() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(fpr)
+              << " into free_page_runs_";
+  }
+}
+
+void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) {
+  DCHECK(size > kLargeSizeThreshold);
+  size_t num_pages = RoundUp(size, kPageSize) / kPageSize;
+  void* r;
+  {
+    MutexLock mu(self, lock_);
+    r = AllocPages(self, num_pages, kPageMapLargeObject);
+  }
+  if (bytes_allocated != NULL) {
+    *bytes_allocated = num_pages * kPageSize;
+  }
+  if (kTraceRosAlloc) {
+    if (r != NULL) {
+      LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r)
+                << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize)
+                << "(" << std::dec << (num_pages * kPageSize) << ")";
+    } else {
+      LOG(INFO) << "RosAlloc::AllocLargeObject() : NULL";
+    }
+  }
+  // Check if the returned memory is really all zero.
+  if (kCheckZeroMemory && r != NULL) {
+    byte* bytes = reinterpret_cast<byte*>(r);
+    for (size_t i = 0; i < size; ++i) {
+      DCHECK_EQ(bytes[i], 0);
+    }
+  }
+  return r;
+}
+
+void RosAlloc::FreeInternal(Thread* self, void* ptr) {
+  DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+  size_t pm_idx = RoundDownToPageMapIndex(ptr);
+  bool free_from_run = false;
+  Run* run = NULL;
+  {
+    MutexLock mu(self, lock_);
+    DCHECK(pm_idx < page_map_.size());
+    byte page_map_entry = page_map_[pm_idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreeInternal() : " << std::hex << ptr << ", pm_idx=" << std::dec << pm_idx
+                << ", page_map_entry=" << static_cast<int>(page_map_entry);
+    }
+    switch (page_map_[pm_idx]) {
+      case kPageMapEmpty:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+      case kPageMapLargeObject:
+        FreePages(self, ptr);
+        return;
+      case kPageMapLargeObjectPart:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+      case kPageMapRun:
+      case kPageMapRunPart: {
+        free_from_run = true;
+        size_t pi = pm_idx;
+        DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
+        // Find the beginning of the run.
+        while (page_map_[pi] != kPageMapRun) {
+          pi--;
+          DCHECK(pi < capacity_ / kPageSize);
+        }
+        DCHECK(page_map_[pi] == kPageMapRun);
+        run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+        DCHECK(run->magic_num_ == kMagicNum);
+        break;
+      }
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+        return;
+    }
+  }
+  if (LIKELY(free_from_run)) {
+    DCHECK(run != NULL);
+    FreeFromRun(self, ptr, run);
+  }
+}
+
+void RosAlloc::Free(Thread* self, void* ptr) {
+  ReaderMutexLock rmu(self, bulk_free_lock_);
+  FreeInternal(self, ptr);
+}
+
+RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) {
+  Run* new_run;
+  size_t num_pages = numOfPages[idx];
+  // Get the lowest address non-full run from the binary tree.
+  Run* temp = NULL;
+  std::set<Run*>* bt = &non_full_runs_[idx];
+  std::set<Run*>::iterator found = bt->lower_bound(temp);
+  if (found != bt->end()) {
+    // If there's one, use it as the current run.
+    Run* non_full_run = *found;
+    DCHECK(non_full_run != NULL);
+    new_run = non_full_run;
+    DCHECK_EQ(new_run->is_thread_local_, 0);
+    bt->erase(found);
+    DCHECK_EQ(non_full_run->is_thread_local_, 0);
+  } else {
+    // If there's none, allocate a new run and use it as the
+    // current run.
+    {
+      MutexLock mu(self, lock_);
+      new_run = reinterpret_cast<Run*>(AllocPages(self, num_pages, kPageMapRun));
+    }
+    if (new_run == NULL) {
+      return NULL;
+    }
+    if (kIsDebugBuild) {
+      new_run->magic_num_ = kMagicNum;
+    }
+    new_run->size_bracket_idx_ = idx;
+    new_run->top_slot_idx_ = 0;
+    new_run->ClearBitMaps();
+    new_run->to_be_bulk_freed_ = false;
+  }
+  return new_run;
+}
+
+void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) {
+  DCHECK(size <= kLargeSizeThreshold);
+  size_t bracket_size;
+  size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
+  DCHECK_EQ(idx, SizeToIndex(size));
+  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+  DCHECK_EQ(bracket_size, bracketSizes[idx]);
+  DCHECK(size <= bracket_size);
+  DCHECK(size > 512 || bracket_size - size < 16);
+
+  void* slot_addr;
+
+  if (LIKELY(idx <= kMaxThreadLocalSizeBracketIdx)) {
+    // Use a thread-local run.
+    Run* thread_local_run = reinterpret_cast<Run*>(self->rosalloc_runs_[idx]);
+    if (UNLIKELY(thread_local_run == NULL)) {
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      thread_local_run = RefillRun(self, idx);
+      if (UNLIKELY(thread_local_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+      thread_local_run->is_thread_local_ = 1;
+      self->rosalloc_runs_[idx] = thread_local_run;
+      DCHECK(!thread_local_run->IsFull());
+    }
+
+    DCHECK(thread_local_run != NULL);
+    DCHECK_NE(thread_local_run->is_thread_local_, 0);
+    slot_addr = thread_local_run->AllocSlot();
+
+    if (UNLIKELY(slot_addr == NULL)) {
+      // The run got full. Try to free slots.
+      DCHECK(thread_local_run->IsFull());
+      MutexLock mu(self, *size_bracket_locks_[idx]);
+      bool is_all_free_after_merge;
+      if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) {
+        // Some slot got freed. Keep it.
+        DCHECK(!thread_local_run->IsFull());
+        DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree());
+        if (is_all_free_after_merge) {
+          // Reinstate the bump index mode if it's all free.
+          DCHECK_EQ(thread_local_run->top_slot_idx_, numOfSlots[idx]);
+          thread_local_run->top_slot_idx_ = 0;
+        }
+      } else {
+        // No slots got freed. Try to refill the thread-local run.
+        DCHECK(thread_local_run->IsFull());
+        self->rosalloc_runs_[idx] = NULL;
+        thread_local_run->is_thread_local_ = 0;
+        if (kIsDebugBuild) {
+          full_runs_[idx].insert(thread_local_run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(thread_local_run)
+                      << " into full_runs_[" << std::dec << idx << "]";
+          }
+        }
+        DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+        DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
+        thread_local_run = RefillRun(self, idx);
+        if (UNLIKELY(thread_local_run == NULL)) {
+          return NULL;
+        }
+        DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+        DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+        thread_local_run->is_thread_local_ = 1;
+        self->rosalloc_runs_[idx] = thread_local_run;
+        DCHECK(!thread_local_run->IsFull());
+      }
+
+      DCHECK(thread_local_run != NULL);
+      DCHECK(!thread_local_run->IsFull());
+      DCHECK_NE(thread_local_run->is_thread_local_, 0);
+      slot_addr = thread_local_run->AllocSlot();
+      // Must succeed now with a new run.
+      DCHECK(slot_addr != NULL);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
+                << "(" << std::dec << (bracket_size) << ")";
+    }
+  } else {
+    // Use the (shared) current run.
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    Run* current_run = current_runs_[idx];
+    if (UNLIKELY(current_run == NULL)) {
+      current_run = RefillRun(self, idx);
+      if (UNLIKELY(current_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
+      current_run->is_thread_local_ = 0;
+      current_runs_[idx] = current_run;
+      DCHECK(!current_run->IsFull());
+    }
+    DCHECK(current_run != NULL);
+    slot_addr = current_run->AllocSlot();
+    if (UNLIKELY(slot_addr == NULL)) {
+      // The current run got full. Try to refill it.
+      DCHECK(current_run->IsFull());
+      current_runs_[idx] = NULL;
+      if (kIsDebugBuild) {
+        // Insert it into full_runs and set the current run to NULL.
+        full_runs_[idx].insert(current_run);
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
+                    << " into full_runs_[" << std::dec << idx << "]";
+        }
+      }
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end());
+      current_run = RefillRun(self, idx);
+      if (UNLIKELY(current_run == NULL)) {
+        return NULL;
+      }
+      DCHECK(current_run != NULL);
+      DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end());
+      current_run->is_thread_local_ = 0;
+      current_runs_[idx] = current_run;
+      DCHECK(!current_run->IsFull());
+      slot_addr = current_run->AllocSlot();
+      // Must succeed now with a new run.
+      DCHECK(slot_addr != NULL);
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size)
+                << "(" << std::dec << (bracket_size) << ")";
+    }
+  }
+  if (LIKELY(bytes_allocated != NULL)) {
+    *bytes_allocated = bracket_size;
+  }
+  memset(slot_addr, 0, size);
+  return slot_addr;
+}
+
+void RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) {
+  DCHECK(run->magic_num_ == kMagicNum);
+  DCHECK(run < ptr && ptr < run->End());
+  size_t idx = run->size_bracket_idx_;
+  MutexLock mu(self, *size_bracket_locks_[idx]);
+  bool run_was_full = false;
+  if (kIsDebugBuild) {
+    run_was_full = run->IsFull();
+  }
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::FreeFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr);
+  }
+  if (LIKELY(run->is_thread_local_ != 0)) {
+    // It's a thread-local run. Just mark the thread-local free bit map and return.
+    DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+    DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+    DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+    run->MarkThreadLocalFreeBitMap(ptr);
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex
+                << reinterpret_cast<intptr_t>(run);
+    }
+    // A thread local run will be kept as a thread local even if it's become all free.
+    return;
+  }
+  // Free the slot in the run.
+  run->FreeSlot(ptr);
+  std::set<Run*>* non_full_runs = &non_full_runs_[idx];
+  if (run->IsAllFree()) {
+    // It has just become completely free. Free the pages of this run.
+    std::set<Run*>::iterator pos = non_full_runs->find(run);
+    if (pos != non_full_runs->end()) {
+      non_full_runs->erase(pos);
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(run) << " from non_full_runs_";
+      }
+    }
+    if (run == current_runs_[idx]) {
+      current_runs_[idx] = NULL;
+    }
+    DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+    DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+    {
+      MutexLock mu(self, lock_);
+      FreePages(self, run);
+    }
+  } else {
+    // It is not completely free. If it wasn't the current run or
+    // already in the non-full run set (i.e., it was full) insert it
+    // into the non-full run set.
+    if (run != current_runs_[idx]) {
+      hash_set<Run*, hash_run, eq_run>* full_runs =
+          kIsDebugBuild ? &full_runs_[idx] : NULL;
+      std::set<Run*>::iterator pos = non_full_runs->find(run);
+      if (pos == non_full_runs->end()) {
+        DCHECK(run_was_full);
+        DCHECK(full_runs->find(run) != full_runs->end());
+        if (kIsDebugBuild) {
+          full_runs->erase(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::FreeFromRun() : Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run) << " from full_runs_";
+          }
+        }
+        non_full_runs->insert(run);
+        DCHECK(!run->IsFull());
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::FreeFromRun() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(run)
+                    << " into non_full_runs_[" << std::dec << idx << "]";
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::Run::Dump() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  std::string bit_map_str;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = alloc_bit_map_[v];
+    if (v != num_vec - 1) {
+      bit_map_str.append(StringPrintf("%x-", vec));
+    } else {
+      bit_map_str.append(StringPrintf("%x", vec));
+    }
+  }
+  LOG(INFO) << "Run : " << std::hex << reinterpret_cast<intptr_t>(this)
+            << std::dec << ", idx=" << idx << ", bit_map=" << bit_map_str;
+}
+
+void* RosAlloc::Run::AllocSlot() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  DCHECK_LE(top_slot_idx_, num_slots);
+  if (LIKELY(top_slot_idx_ < num_slots)) {
+    // If it's in bump index mode, grab the top slot and increment the top index.
+    size_t slot_idx = top_slot_idx_;
+    byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    }
+    top_slot_idx_++;
+    size_t vec_idx = slot_idx / 32;
+    size_t vec_off = slot_idx % 32;
+    uint32_t* vec = &alloc_bit_map_[vec_idx];
+    DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+    *vec |= 1 << vec_off;
+    DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+    return slot_addr;
+  }
+  // Not in bump index mode. Search the alloc bit map for an empty slot.
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slot_idx = 0;
+  bool found_slot = false;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t *vecp = &alloc_bit_map_[v];
+    uint32_t ffz1 = __builtin_ffs(~*vecp);
+    uint32_t ffz;
+    // TODO: Use LIKELY or UNLIKELY here?
+    if (LIKELY(ffz1 > 0 && (ffz = ffz1 - 1) + v * 32 < num_slots)) {
+      // Found an empty slot. Set the bit.
+      DCHECK_EQ((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
+      *vecp |= (1 << ffz);
+      DCHECK_NE((*vecp & (1 << ffz)), static_cast<uint32_t>(0));
+      slot_idx = ffz + v * 32;
+      found_slot = true;
+      break;
+    }
+  }
+  if (LIKELY(found_slot)) {
+    byte* slot_addr = reinterpret_cast<byte*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx];
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr)
+                << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+    }
+    return slot_addr;
+  }
+  return NULL;
+}
+
+inline void RosAlloc::Run::FreeSlot(void* ptr) {
+  DCHECK_EQ(is_thread_local_, 0);
+  byte idx = size_bracket_idx_;
+  size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+      - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
+  DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+  size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+  DCHECK(slot_idx < numOfSlots[idx]);
+  size_t vec_idx = slot_idx / 32;
+  if (kIsDebugBuild) {
+    size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
+    DCHECK(vec_idx < num_vec);
+  }
+  size_t vec_off = slot_idx % 32;
+  uint32_t* vec = &alloc_bit_map_[vec_idx];
+  DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  *vec &= ~(1 << vec_off);
+  DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+  }
+}
+
+inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) {
+  DCHECK_NE(is_thread_local_, 0);
+  // Free slots in the alloc bit map based on the thread local free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  bool changed = false;
+  uint32_t* vecp = &alloc_bit_map_[0];
+  uint32_t* tl_free_vecp = &thread_local_free_bit_map()[0];
+  bool is_all_free_after = true;
+  for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
+    uint32_t tl_free_vec = *tl_free_vecp;
+    uint32_t vec_before = *vecp;
+    uint32_t vec_after;
+    if (tl_free_vec != 0) {
+      vec_after = vec_before & ~tl_free_vec;
+      *vecp = vec_after;
+      changed = true;
+      *tl_free_vecp = 0;  // clear the thread local free bit map.
+    } else {
+      vec_after = vec_before;
+    }
+    if (vec_after != 0) {
+      is_all_free_after = false;
+    }
+    DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0));
+  }
+  *is_all_free_after_out = is_all_free_after;
+  // Return true if there was at least a bit set in the thread-local
+  // free bit map and at least a bit in the alloc bit map changed.
+  return changed;
+}
+
+inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() {
+  DCHECK_EQ(is_thread_local_, 0);
+  // Free slots in the alloc bit map based on the bulk free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  uint32_t* vecp = &alloc_bit_map_[0];
+  uint32_t* free_vecp = &bulk_free_bit_map()[0];
+  for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
+    uint32_t free_vec = *free_vecp;
+    if (free_vec != 0) {
+      *vecp &= ~free_vec;
+      *free_vecp = 0;  // clear the bulk free bit map.
+    }
+    DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0));
+  }
+}
+
+inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() {
+  DCHECK_NE(is_thread_local_, 0);
+  // Union the thread local bit map with the bulk free bit map.
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  uint32_t* to_vecp = &thread_local_free_bit_map()[0];
+  uint32_t* from_vecp = &bulk_free_bit_map()[0];
+  for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
+    uint32_t from_vec = *from_vecp;
+    if (from_vec != 0) {
+      *to_vecp |= from_vec;
+      *from_vecp = 0;  // clear the from free bit map.
+    }
+    DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
+  }
+}
+
+inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
+  DCHECK_NE(is_thread_local_, 0);
+  MarkFreeBitMapShared(ptr, thread_local_free_bit_map(), "MarkThreadLocalFreeBitMap");
+}
+
+inline void RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
+  MarkFreeBitMapShared(ptr, bulk_free_bit_map(), "MarkFreeBitMap");
+}
+
+inline void RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
+                                              const char* caller_name) {
+  byte idx = size_bracket_idx_;
+  size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+      - (reinterpret_cast<byte*>(this) + headerSizes[idx]);
+  DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+  size_t slot_idx = offset_from_slot_base / bracketSizes[idx];
+  DCHECK(slot_idx < numOfSlots[idx]);
+  size_t vec_idx = slot_idx / 32;
+  if (kIsDebugBuild) {
+    size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32;
+    DCHECK(vec_idx < num_vec);
+  }
+  size_t vec_off = slot_idx % 32;
+  uint32_t* vec = &free_bit_map_base[vec_idx];
+  DCHECK_EQ((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  *vec |= 1 << vec_off;
+  DCHECK_NE((*vec & (1 << vec_off)), static_cast<uint32_t>(0));
+  if (kTraceRosAlloc) {
+    LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex
+              << reinterpret_cast<intptr_t>(ptr)
+              << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx;
+  }
+}
+
+inline bool RosAlloc::Run::IsAllFree() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = alloc_bit_map_[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool RosAlloc::Run::IsFull() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots);
+    uint32_t vec = alloc_bit_map_[v];
+    uint32_t mask = (num_slots - slots >= 32) ? static_cast<uint32_t>(-1)
+        : (1 << (num_slots - slots)) - 1;
+    DCHECK(num_slots - slots >= 32 ? mask == static_cast<uint32_t>(-1) : true);
+    if (vec != mask) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void RosAlloc::Run::ClearBitMaps() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  memset(alloc_bit_map_, 0, sizeof(uint32_t) * num_vec * 3);
+}
+
+void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                                    void* arg) {
+  size_t idx = size_bracket_idx_;
+  byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
+  size_t num_slots = numOfSlots[idx];
+  size_t bracket_size = IndexToBracketSize(idx);
+  DCHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize);
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots);
+    uint32_t vec = alloc_bit_map_[v];
+    size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
+    for (size_t i = 0; i < end; ++i) {
+      bool is_allocated = ((vec >> i) & 0x1) != 0;
+      byte* slot_addr = slot_base + (slots + i) * bracket_size;
+      if (is_allocated) {
+        handler(slot_addr, slot_addr + bracket_size, bracket_size, arg);
+      } else {
+        handler(slot_addr, slot_addr + bracket_size, 0, arg);
+      }
+    }
+  }
+}
+
+void RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
+  if (false) {
+    // Used only to test Free() as GC uses only BulkFree().
+    for (size_t i = 0; i < num_ptrs; ++i) {
+      FreeInternal(self, ptrs[i]);
+    }
+    return;
+  }
+
+  WriterMutexLock wmu(self, bulk_free_lock_);
+
+  // First mark slots to free in the bulk free bit map without locking the
+  // size bracket locks. On host, hash_set is faster than vector + flag.
+#ifdef HAVE_ANDROID_OS
+  std::vector<Run*> runs;
+#else
+  hash_set<Run*, hash_run, eq_run> runs;
+#endif
+  {
+    for (size_t i = 0; i < num_ptrs; i++) {
+      void* ptr = ptrs[i];
+      ptrs[i] = NULL;
+      DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+      size_t pm_idx = RoundDownToPageMapIndex(ptr);
+      bool free_from_run = false;
+      Run* run = NULL;
+      {
+        MutexLock mu(self, lock_);
+        DCHECK(pm_idx < page_map_.size());
+        byte page_map_entry = page_map_[pm_idx];
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::BulkFree() : " << std::hex << ptr << ", pm_idx="
+                    << std::dec << pm_idx
+                    << ", page_map_entry=" << static_cast<int>(page_map_entry);
+        }
+        if (LIKELY(page_map_entry == kPageMapRun)) {
+          free_from_run = true;
+          run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum);
+        } else if (LIKELY(page_map_entry == kPageMapRunPart)) {
+          free_from_run = true;
+          size_t pi = pm_idx;
+          DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
+          // Find the beginning of the run.
+          while (page_map_[pi] != kPageMapRun) {
+            pi--;
+            DCHECK(pi < capacity_ / kPageSize);
+          }
+          DCHECK(page_map_[pi] == kPageMapRun);
+          run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum);
+        } else if (page_map_entry == kPageMapLargeObject) {
+          FreePages(self, ptr);
+        } else {
+          LOG(FATAL) << "Unreachable - page map type: " << page_map_entry;
+        }
+      }
+      if (LIKELY(free_from_run)) {
+        DCHECK(run != NULL);
+        // Set the bit in the bulk free bit map.
+        run->MarkBulkFreeBitMap(ptr);
+#ifdef HAVE_ANDROID_OS
+        if (!run->to_be_bulk_freed_) {
+          run->to_be_bulk_freed_ = true;
+          runs.push_back(run);
+        }
+#else
+        runs.insert(run);
+#endif
+      }
+    }
+  }
+
+  // Now, iterate over the affected runs and update the alloc bit map
+  // based on the bulk free bit map (for non-thread-local runs) and
+  // union the bulk free bit map into the thread-local free bit map
+  // (for thread-local runs.)
+#ifdef HAVE_ANDROID_OS
+  typedef std::vector<Run*>::iterator It;
+#else
+  typedef hash_set<Run*, hash_run, eq_run>::iterator It;
+#endif
+  for (It it = runs.begin(); it != runs.end(); ++it) {
+    Run* run = *it;
+#ifdef HAVE_ANDROID_OS
+    DCHECK(run->to_be_bulk_freed_);
+    run->to_be_bulk_freed_ = false;
+#endif
+    size_t idx = run->size_bracket_idx_;
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    if (run->is_thread_local_ != 0) {
+      DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx);
+      DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end());
+      run->UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x"
+                  << std::hex << reinterpret_cast<intptr_t>(run);
+      }
+      DCHECK_NE(run->is_thread_local_, 0);
+      // A thread local run will be kept as a thread local even if
+      // it's become all free.
+    } else {
+      bool run_was_full = run->IsFull();
+      run->MergeBulkFreeBitMapIntoAllocBitMap();
+      if (kTraceRosAlloc) {
+        LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex
+                  << reinterpret_cast<intptr_t>(run);
+      }
+      // Check if the run should be moved to non_full_runs_ or
+      // free_page_runs_.
+      std::set<Run*>* non_full_runs = &non_full_runs_[idx];
+      hash_set<Run*, hash_run, eq_run>* full_runs =
+          kIsDebugBuild ? &full_runs_[idx] : NULL;
+      if (run->IsAllFree()) {
+        // It has just become completely free. Free the pages of the
+        // run.
+        bool run_was_current = run == current_runs_[idx];
+        if (run_was_current) {
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          // If it was a current run, reuse it.
+        } else if (run_was_full) {
+          // If it was full, remove it from the full run set (debug
+          // only.)
+          if (kIsDebugBuild) {
+            hash_set<Run*, hash_run, eq_run>::iterator pos = full_runs->find(run);
+            DCHECK(pos != full_runs->end());
+            full_runs->erase(pos);
+            if (kTraceRosAlloc) {
+              LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                        << reinterpret_cast<intptr_t>(run)
+                        << " from full_runs_";
+            }
+            DCHECK(full_runs->find(run) == full_runs->end());
+          }
+        } else {
+          // If it was in a non full run set, remove it from the set.
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) != non_full_runs->end());
+          non_full_runs->erase(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run)
+                      << " from non_full_runs_";
+          }
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+        }
+        if (!run_was_current) {
+          MutexLock mu(self, lock_);
+          FreePages(self, run);
+        }
+      } else {
+        // It is not completely free. If it wasn't the current run or
+        // already in the non-full run set (i.e., it was full) insert
+        // it into the non-full run set.
+        if (run == current_runs_[idx]) {
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          DCHECK(full_runs->find(run) == full_runs->end());
+          // If it was a current run, keep it.
+        } else if (run_was_full) {
+          // If it was full, remove it from the full run set (debug
+          // only) and insert into the non-full run set.
+          DCHECK(full_runs->find(run) != full_runs->end());
+          DCHECK(non_full_runs->find(run) == non_full_runs->end());
+          if (kIsDebugBuild) {
+            full_runs->erase(run);
+            if (kTraceRosAlloc) {
+              LOG(INFO) << "RosAlloc::BulkFree() : Erased run 0x" << std::hex
+                        << reinterpret_cast<intptr_t>(run)
+                        << " from full_runs_";
+            }
+          }
+          non_full_runs->insert(run);
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::BulkFree() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(run)
+                      << " into non_full_runs_[" << std::dec << idx;
+          }
+        } else {
+          // If it was not full, so leave it in the non full run set.
+          DCHECK(full_runs->find(run) == full_runs->end());
+          DCHECK(non_full_runs->find(run) != non_full_runs->end());
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::DumpPageMap(Thread* self) {
+  MutexLock mu(self, lock_);
+  size_t end = page_map_.size();
+  FreePageRun* curr_fpr = NULL;
+  size_t curr_fpr_size = 0;
+  size_t remaining_curr_fpr_size = 0;
+  size_t num_running_empty_pages = 0;
+  for (size_t i = 0; i < end; ++i) {
+    byte pm = page_map_[i];
+    switch (pm) {
+      case kPageMapEmpty: {
+        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+        if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
+          // Encountered a fresh free page run.
+          DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+          DCHECK(fpr->IsFree());
+          DCHECK(curr_fpr == NULL);
+          DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
+          curr_fpr = fpr;
+          curr_fpr_size = fpr->ByteSize(this);
+          DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
+          remaining_curr_fpr_size = curr_fpr_size - kPageSize;
+          LOG(INFO) << "[" << i << "]=Empty (FPR start)"
+                    << " fpr_size=" << curr_fpr_size
+                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          if (remaining_curr_fpr_size == 0) {
+            // Reset at the end of the current free page run.
+            curr_fpr = NULL;
+            curr_fpr_size = 0;
+          }
+          LOG(INFO) << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr);
+          DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
+        } else {
+          // Still part of the current free page run.
+          DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
+          DCHECK(curr_fpr != NULL && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
+          DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
+          DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
+          remaining_curr_fpr_size -= kPageSize;
+          LOG(INFO) << "[" << i << "]=Empty (FPR part)"
+                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          if (remaining_curr_fpr_size == 0) {
+            // Reset at the end of the current free page run.
+            curr_fpr = NULL;
+            curr_fpr_size = 0;
+          }
+        }
+        num_running_empty_pages++;
+        break;
+      }
+      case kPageMapLargeObject: {
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Large (start)";
+        break;
+      }
+      case kPageMapLargeObjectPart:
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Large (part)";
+        break;
+      case kPageMapRun: {
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+        size_t idx = run->size_bracket_idx_;
+        LOG(INFO) << "[" << i << "]=Run (start)"
+                  << " idx=" << idx
+                  << " numOfPages=" << numOfPages[idx]
+                  << " thread_local=" << static_cast<int>(run->is_thread_local_)
+                  << " is_all_free=" << (run->IsAllFree() ? 1 : 0);
+        break;
+      }
+      case kPageMapRunPart:
+        DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
+        num_running_empty_pages = 0;
+        LOG(INFO) << "[" << i << "]=Run (part)";
+        break;
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+    }
+  }
+}
+
+size_t RosAlloc::UsableSize(void* ptr) {
+  DCHECK(base_ <= ptr && ptr < base_ + footprint_);
+  size_t pm_idx = RoundDownToPageMapIndex(ptr);
+  MutexLock mu(Thread::Current(), lock_);
+  switch (page_map_[pm_idx]) {
+  case kPageMapEmpty:
+    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    break;
+  case kPageMapLargeObject: {
+    size_t num_pages = 1;
+    size_t idx = pm_idx + 1;
+    size_t end = page_map_.size();
+    while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
+      num_pages++;
+      idx++;
+    }
+    return num_pages * kPageSize;
+  }
+  case kPageMapLargeObjectPart:
+    LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
+               << reinterpret_cast<intptr_t>(ptr);
+    break;
+  case kPageMapRun:
+  case kPageMapRunPart: {
+    // Find the beginning of the run.
+    while (page_map_[pm_idx] != kPageMapRun) {
+      pm_idx--;
+      DCHECK(pm_idx < capacity_ / kPageSize);
+    }
+    DCHECK(page_map_[pm_idx] == kPageMapRun);
+    Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+    DCHECK(run->magic_num_ == kMagicNum);
+    size_t idx = run->size_bracket_idx_;
+    size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+        - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
+    DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+    return IndexToBracketSize(idx);
+  }
+  default:
+    LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+    break;
+  }
+  return 0;
+}
+
+bool RosAlloc::Trim() {
+  MutexLock mu(Thread::Current(), lock_);
+  FreePageRun* last_free_page_run;
+  DCHECK_EQ(footprint_ % kPageSize, static_cast<size_t>(0));
+  auto it = free_page_runs_.rbegin();
+  if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
+    // Remove the last free page run, if any.
+    DCHECK(last_free_page_run->IsFree());
+    DCHECK(page_map_[ToPageMapIndex(last_free_page_run)] == kPageMapEmpty);
+    DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
+    DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
+    free_page_runs_.erase(last_free_page_run);
+    size_t decrement = last_free_page_run->ByteSize(this);
+    size_t new_footprint = footprint_ - decrement;
+    DCHECK_EQ(new_footprint % kPageSize, static_cast<size_t>(0));
+    size_t new_num_of_pages = new_footprint / kPageSize;
+    DCHECK_GE(page_map_.size(), new_num_of_pages);
+    page_map_.resize(new_num_of_pages);
+    DCHECK_EQ(page_map_.size(), new_num_of_pages);
+    free_page_run_size_map_.resize(new_num_of_pages);
+    DCHECK_EQ(free_page_run_size_map_.size(), new_num_of_pages);
+    art_heap_rosalloc_morecore(this, -(static_cast<intptr_t>(decrement)));
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "RosAlloc::Trim() : decreased the footprint from "
+                << footprint_ << " to " << new_footprint;
+    }
+    DCHECK_LT(new_footprint, footprint_);
+    DCHECK_LT(new_footprint, capacity_);
+    footprint_ = new_footprint;
+    return true;
+  }
+  return false;
+}
+
+void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                          void* arg) {
+  // Note: no need to use this to release pages as we already do so in FreePages().
+  if (handler == NULL) {
+    return;
+  }
+  MutexLock mu(Thread::Current(), lock_);
+  size_t pm_end = page_map_.size();
+  size_t i = 0;
+  while (i < pm_end) {
+    byte pm = page_map_[i];
+    switch (pm) {
+      case kPageMapEmpty: {
+        // The start of a free page run.
+        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+        DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+        size_t fpr_size = fpr->ByteSize(this);
+        DCHECK(IsAligned<kPageSize>(fpr_size));
+        void* start = fpr;
+        void* end = reinterpret_cast<byte*>(start) + fpr_size;
+        handler(start, end, 0, arg);
+        size_t num_pages = fpr_size / kPageSize;
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapEmpty);
+          }
+        }
+        i += fpr_size / kPageSize;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapLargeObject: {
+        // The start of a large object.
+        size_t num_pages = 1;
+        size_t idx = i + 1;
+        while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
+          num_pages++;
+          idx++;
+        }
+        void* start = base_ + i * kPageSize;
+        void* end = base_ + (i + num_pages) * kPageSize;
+        size_t used_bytes = num_pages * kPageSize;
+        handler(start, end, used_bytes, arg);
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapLargeObjectPart);
+          }
+        }
+        i += num_pages;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapLargeObjectPart:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+      case kPageMapRun: {
+        // The start of a run.
+        Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+        DCHECK(run->magic_num_ == kMagicNum);
+        run->InspectAllSlots(handler, arg);
+        size_t num_pages = numOfPages[run->size_bracket_idx_];
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapRunPart);
+          }
+        }
+        i += num_pages;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapRunPart:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+    }
+  }
+}
+
+size_t RosAlloc::Footprint() {
+  MutexLock mu(Thread::Current(), lock_);
+  return footprint_;
+}
+
+size_t RosAlloc::FootprintLimit() {
+  MutexLock mu(Thread::Current(), lock_);
+  return capacity_;
+}
+
+void RosAlloc::SetFootprintLimit(size_t new_capacity) {
+  MutexLock mu(Thread::Current(), lock_);
+  DCHECK_EQ(RoundUp(new_capacity, kPageSize), new_capacity);
+  // Only growing is supported here. But Trim() is supported.
+  if (capacity_ < new_capacity) {
+    capacity_ = new_capacity;
+    VLOG(heap) << "new capacity=" << capacity_;
+  }
+}
+
+void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
+  Thread* self = Thread::Current();
+  for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
+    MutexLock mu(self, *size_bracket_locks_[idx]);
+    Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[idx]);
+    if (thread_local_run != NULL) {
+      DCHECK_EQ(thread_local_run->magic_num_, kMagicNum);
+      DCHECK_NE(thread_local_run->is_thread_local_, 0);
+      thread->rosalloc_runs_[idx] = NULL;
+      // Note the thread local run may not be full here.
+      bool dont_care;
+      thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care);
+      thread_local_run->is_thread_local_ = 0;
+      thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap();
+      DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
+      DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
+      if (thread_local_run->IsFull()) {
+        if (kIsDebugBuild) {
+          full_runs_[idx].insert(thread_local_run);
+          DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end());
+          if (kTraceRosAlloc) {
+            LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
+                      << reinterpret_cast<intptr_t>(thread_local_run)
+                      << " into full_runs_[" << std::dec << idx << "]";
+          }
+        }
+      } else if (thread_local_run->IsAllFree()) {
+        MutexLock mu(self, lock_);
+        FreePages(self, thread_local_run);
+      } else {
+        non_full_runs_[idx].insert(thread_local_run);
+        DCHECK(non_full_runs_[idx].find(thread_local_run) != non_full_runs_[idx].end());
+        if (kTraceRosAlloc) {
+          LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex
+                    << reinterpret_cast<intptr_t>(thread_local_run)
+                    << " into non_full_runs_[" << std::dec << idx << "]";
+        }
+      }
+    }
+  }
+}
+
+void RosAlloc::RevokeAllThreadLocalRuns() {
+  // This is called when a mutator thread won't allocate such as at
+  // the Zygote creation time or during the GC pause.
+  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+  std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+  for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
+    Thread* t = *it;
+    RevokeThreadLocalRuns(t);
+  }
+}
+
+void RosAlloc::Initialize() {
+  // Check the consistency of the number of size brackets.
+  DCHECK_EQ(Thread::kRosAllocNumOfSizeBrackets, kNumOfSizeBrackets);
+  // bracketSizes.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    if (i < kNumOfSizeBrackets - 2) {
+      bracketSizes[i] = 16 * (i + 1);
+    } else if (i == kNumOfSizeBrackets - 2) {
+      bracketSizes[i] = 1 * KB;
+    } else {
+      DCHECK(i == kNumOfSizeBrackets - 1);
+      bracketSizes[i] = 2 * KB;
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "bracketSizes[" << i << "]=" << bracketSizes[i];
+    }
+  }
+  // numOfPages.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    if (i < 4) {
+      numOfPages[i] = 1;
+    } else if (i < 8) {
+      numOfPages[i] = 2;
+    } else if (i < 16) {
+      numOfPages[i] = 4;
+    } else if (i < 32) {
+      numOfPages[i] = 8;
+    } else if (i == 32) {
+      DCHECK(i = kNumOfSizeBrackets - 2);
+      numOfPages[i] = 16;
+    } else {
+      DCHECK(i = kNumOfSizeBrackets - 1);
+      numOfPages[i] = 32;
+    }
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "numOfPages[" << i << "]=" << numOfPages[i];
+    }
+  }
+  // Compute numOfSlots and slotOffsets.
+  for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+    size_t bracket_size = bracketSizes[i];
+    size_t run_size = kPageSize * numOfPages[i];
+    size_t max_num_of_slots = run_size / bracket_size;
+    // Compute the actual number of slots by taking the header and
+    // alignment into account.
+    size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t));
+    DCHECK_EQ(fixed_header_size, static_cast<size_t>(8));
+    size_t header_size = 0;
+    size_t bulk_free_bit_map_offset = 0;
+    size_t thread_local_free_bit_map_offset = 0;
+    size_t num_of_slots = 0;
+    // Search for the maximum number of slots that allows enough space
+    // for the header (including the bit maps.)
+    for (int s = max_num_of_slots; s >= 0; s--) {
+      size_t tmp_slots_size = bracket_size * s;
+      size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte;
+      size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size;
+      size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size;
+      size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size;
+      size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size;
+      size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size;
+      // Align up the unaligned header size. bracket_size may not be a power of two.
+      size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
+          tmp_unaligned_header_size :
+          tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
+      DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
+      DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
+      if (tmp_slots_size + tmp_header_size <= run_size) {
+        // Found the right number of slots, that is, there was enough
+        // space for the header (including the bit maps.)
+        num_of_slots = s;
+        header_size = tmp_header_size;
+        bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off;
+        thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off;
+        break;
+      }
+    }
+    DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0);
+    // Add the padding for the alignment remainder.
+    header_size += run_size % bracket_size;
+    DCHECK(header_size + num_of_slots * bracket_size == run_size);
+    numOfSlots[i] = num_of_slots;
+    headerSizes[i] = header_size;
+    bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset;
+    threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset;
+    if (kTraceRosAlloc) {
+      LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i]
+                << ", headerSizes[" << i << "]=" << headerSizes[i]
+                << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i]
+                << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];;
+    }
+  }
+}
+
+void RosAlloc::BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* bytes_allocated = reinterpret_cast<size_t*>(arg);
+  *bytes_allocated += used_bytes;
+}
+
+void RosAlloc::ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  if (used_bytes == 0) {
+    return;
+  }
+  size_t* objects_allocated = reinterpret_cast<size_t*>(arg);
+  ++(*objects_allocated);
+}
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
new file mode 100644
index 0000000..d5b6de1
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc.h
@@ -0,0 +1,489 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+
+#include <set>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <sys/mman.h>
+#include <vector>
+
+#include "base/mutex.h"
+#include "base/logging.h"
+#include "globals.h"
+#include "utils.h"
+
+// A boilerplate to use hash_map/hash_set both on host and device.
+#ifdef HAVE_ANDROID_OS
+#include <hash_map>
+#include <hash_set>
+using std::hash_map;
+using std::hash_set;
+#else  // HAVE_ANDROID_OS
+#ifdef __DEPRECATED
+#define ROSALLOC_OLD__DEPRECATED __DEPRECATED
+#undef __DEPRECATED
+#endif
+#include <ext/hash_map>
+#include <ext/hash_set>
+#ifdef ROSALLOC_OLD__DEPRECATED
+#define __DEPRECATED ROSALLOC_OLD__DEPRECATED
+#undef ROSALLOC_OLD__DEPRECATED
+#endif
+using __gnu_cxx::hash_map;
+using __gnu_cxx::hash_set;
+#endif  // HAVE_ANDROID_OS
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+// A Runs-of-slots memory allocator.
+class RosAlloc {
+ private:
+  // Rerepresents a run of free pages.
+  class FreePageRun {
+   public:
+    byte magic_num_;  // The magic number used for debugging only.
+
+    bool IsFree() const {
+      if (kIsDebugBuild) {
+        return magic_num_ == kMagicNumFree;
+      }
+      return true;
+    }
+    size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      const byte* fpr_base = reinterpret_cast<const byte*>(this);
+      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+      size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
+      DCHECK_GE(byte_size, static_cast<size_t>(0));
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      return byte_size;
+    }
+    void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
+        EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      byte* fpr_base = reinterpret_cast<byte*>(this);
+      size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+      rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
+    }
+    void* Begin() {
+      return reinterpret_cast<void*>(this);
+    }
+    void* End(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      byte* fpr_base = reinterpret_cast<byte*>(this);
+      byte* end = fpr_base + ByteSize(rosalloc);
+      return end;
+    }
+    void ReleasePages(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+      size_t byte_size = ByteSize(rosalloc);
+      DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+      if (kIsDebugBuild) {
+        // Exclude the first page that stores the magic number.
+        DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
+        byte_size -= kPageSize;
+        if (byte_size > 0) {
+          madvise(reinterpret_cast<byte*>(this) + kPageSize, byte_size, MADV_DONTNEED);
+        }
+      } else {
+        madvise(this, byte_size, MADV_DONTNEED);
+      }
+    }
+  };
+
+  // Represents a run of memory slots of the same size.
+  //
+  // A run's memory layout:
+  //
+  // +-------------------+
+  // | magic_num         |
+  // +-------------------+
+  // | size_bracket_idx  |
+  // +-------------------+
+  // | is_thread_local   |
+  // +-------------------+
+  // | to_be_bulk_freed  |
+  // +-------------------+
+  // | top_slot_idx      |
+  // +-------------------+
+  // |                   |
+  // | alloc bit map     |
+  // |                   |
+  // +-------------------+
+  // |                   |
+  // | bulk free bit map |
+  // |                   |
+  // +-------------------+
+  // |                   |
+  // | thread-local free |
+  // | bit map           |
+  // |                   |
+  // +-------------------+
+  // | padding due to    |
+  // | alignment         |
+  // +-------------------+
+  // | slot 0            |
+  // +-------------------+
+  // | slot 1            |
+  // +-------------------+
+  // | slot 2            |
+  // +-------------------+
+  // ...
+  // +-------------------+
+  // | last slot         |
+  // +-------------------+
+  //
+  class Run {
+   public:
+    byte magic_num_;             // The magic number used for debugging.
+    byte size_bracket_idx_;      // The index of the size bracket of this run.
+    byte is_thread_local_;       // True if this run is used as a thread-local run.
+    byte to_be_bulk_freed_;      // Used within BulkFree() to flag a run that's involved with a bulk free.
+    uint32_t top_slot_idx_;      // The top slot index when this run is in bump index mode.
+    uint32_t alloc_bit_map_[0];  // The bit map that allocates if each slot is in use.
+
+    // bulk_free_bit_map_[] : The bit map that is used for GC to
+    // temporarily mark the slots to free without using a lock. After
+    // all the slots to be freed in a run are marked, all those slots
+    // get freed in bulk with one locking per run, as opposed to one
+    // locking per slot to minimize the lock contention. This is used
+    // within BulkFree().
+
+    // thread_local_free_bit_map_[] : The bit map that is used for GC
+    // to temporarily mark the slots to free in a thread-local run
+    // without using a lock (without synchronizing the thread that
+    // owns the thread-local run.) When the thread-local run becomes
+    // full, the thread will check this bit map and update the
+    // allocation bit map of the run (that is, the slots get freed.)
+
+    // Returns the byte size of the header except for the bit maps.
+    static size_t fixed_header_size() {
+      Run temp;
+      size_t size = reinterpret_cast<byte*>(&temp.alloc_bit_map_) - reinterpret_cast<byte*>(&temp);
+      DCHECK_EQ(size, static_cast<size_t>(8));
+      return size;
+    }
+    // Returns the base address of the free bit map.
+    uint32_t* bulk_free_bit_map() {
+      return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
+    }
+    // Returns the base address of the thread local free bit map.
+    uint32_t* thread_local_free_bit_map() {
+      return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
+    }
+    void* End() {
+      return reinterpret_cast<byte*>(this) + kPageSize * numOfPages[size_bracket_idx_];
+    }
+    // Frees slots in the allocation bit map with regard to the
+    // thread-local free bit map. Used when a thread-local run becomes
+    // full.
+    bool MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out);
+    // Frees slots in the allocation bit map with regard to the bulk
+    // free bit map. Used in a bulk free.
+    void MergeBulkFreeBitMapIntoAllocBitMap();
+    // Unions the slots to be freed in the free bit map into the
+    // thread-local free bit map. In a bulk free, as a two-step
+    // process, GC will first record all the slots to free in a run in
+    // the free bit map where it can write without a lock, and later
+    // acquire a lock once per run to union the bits of the free bit
+    // map to the thread-local free bit map.
+    void UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+    // Allocates a slot in a run.
+    void* AllocSlot();
+    // Frees a slot in a run. This is used in a non-bulk free.
+    void FreeSlot(void* ptr);
+    // Marks the slots to free in the bulk free bit map.
+    void MarkBulkFreeBitMap(void* ptr);
+    // Marks the slots to free in the thread-local free bit map.
+    void MarkThreadLocalFreeBitMap(void* ptr);
+    // Returns true if all the slots in the run are not in use.
+    bool IsAllFree();
+    // Returns true if all the slots in the run are in use.
+    bool IsFull();
+    // Clear all the bit maps.
+    void ClearBitMaps();
+    // Iterate over all the slots and apply the given function.
+    void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
+    // Dump the run metadata for debugging.
+    void Dump();
+
+   private:
+    // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
+    void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
+  };
+
+  // The magic number for a run.
+  static const byte kMagicNum = 42;
+  // The magic number for free pages.
+  static const byte kMagicNumFree = 43;
+  // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
+  static const size_t kNumOfSizeBrackets = 34;
+  // The number of smaller size brackets that are 16 bytes apart.
+  static const size_t kNumOfQuantumSizeBrackets = 32;
+  // The sizes (the slot sizes, in bytes) of the size brackets.
+  static size_t bracketSizes[kNumOfSizeBrackets];
+  // The numbers of pages that are used for runs for each size bracket.
+  static size_t numOfPages[kNumOfSizeBrackets];
+  // The numbers of slots of the runs for each size bracket.
+  static size_t numOfSlots[kNumOfSizeBrackets];
+  // The header sizes in bytes of the runs for each size bracket.
+  static size_t headerSizes[kNumOfSizeBrackets];
+  // The byte offsets of the bulk free bit maps of the runs for each size bracket.
+  static size_t bulkFreeBitMapOffsets[kNumOfSizeBrackets];
+  // The byte offsets of the thread-local free bit maps of the runs for each size bracket.
+  static size_t threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
+
+  // Initialize the run specs (the above arrays).
+  static void Initialize();
+  static bool initialized_;
+
+  // Returns the byte size of the bracket size from the index.
+  static size_t IndexToBracketSize(size_t idx) {
+    DCHECK(idx < kNumOfSizeBrackets);
+    return bracketSizes[idx];
+  }
+  // Returns the index of the size bracket from the bracket size.
+  static size_t BracketSizeToIndex(size_t size) {
+    DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
+    size_t idx;
+    if (UNLIKELY(size == 1 * KB)) {
+      idx = kNumOfSizeBrackets - 2;
+    } else if (UNLIKELY(size == 2 * KB)) {
+      idx = kNumOfSizeBrackets - 1;
+    } else {
+      DCHECK(size < 1 * KB);
+      DCHECK_EQ(size % 16, static_cast<size_t>(0));
+      idx = size / 16 - 1;
+    }
+    DCHECK(bracketSizes[idx] == size);
+    return idx;
+  }
+  // Rounds up the size up the nearest bracket size.
+  static size_t RoundToBracketSize(size_t size) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      return RoundUp(size, 16);
+    } else if (512 < size && size <= 1 * KB) {
+      return 1 * KB;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      return 2 * KB;
+    }
+  }
+  // Returns the size bracket index from the byte size with rounding.
+  static size_t SizeToIndex(size_t size) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      return RoundUp(size, 16) / 16 - 1;
+    } else if (512 < size && size <= 1 * KB) {
+      return kNumOfSizeBrackets - 2;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      return kNumOfSizeBrackets - 1;
+    }
+  }
+  // A combination of SizeToIndex() and RoundToBracketSize().
+  static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
+    DCHECK(size <= kLargeSizeThreshold);
+    if (LIKELY(size <= 512)) {
+      size_t bracket_size = RoundUp(size, 16);
+      *bracket_size_out = bracket_size;
+      size_t idx = bracket_size / 16 - 1;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    } else if (512 < size && size <= 1 * KB) {
+      size_t bracket_size = 1024;
+      *bracket_size_out = bracket_size;
+      size_t idx = kNumOfSizeBrackets - 2;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    } else {
+      DCHECK(1 * KB < size && size <= 2 * KB);
+      size_t bracket_size = 2048;
+      *bracket_size_out = bracket_size;
+      size_t idx = kNumOfSizeBrackets - 1;
+      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+      return idx;
+    }
+  }
+  // Returns the page map index from an address. Requires that the
+  // address is page size aligned.
+  size_t ToPageMapIndex(const void* addr) const {
+    DCHECK(base_ <= addr && addr < base_ + capacity_);
+    size_t byte_offset = reinterpret_cast<const byte*>(addr) - base_;
+    DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
+    return byte_offset / kPageSize;
+  }
+  // Returns the page map index from an address with rounding.
+  size_t RoundDownToPageMapIndex(void* addr) {
+    DCHECK(base_ <= addr && addr < reinterpret_cast<byte*>(base_) + capacity_);
+    return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
+  }
+
+  // A memory allocation request larger than this size is treated as a large object and allocated
+  // at a page-granularity.
+  static const size_t kLargeSizeThreshold = 2048;
+
+  // We use use thread-local runs for the size Brackets whose indexes
+  // are less than or equal to this index. We use shared (current)
+  // runs for the rest.
+  static const size_t kMaxThreadLocalSizeBracketIdx = 10;
+
+  // If true, check that the returned memory is actually zero.
+  static constexpr bool kCheckZeroMemory = kIsDebugBuild;
+
+  // If true, log verbose details of operations.
+  static constexpr bool kTraceRosAlloc = false;
+
+  struct hash_run {
+    size_t operator()(const RosAlloc::Run* r) const {
+      return reinterpret_cast<size_t>(r);
+    }
+  };
+
+  struct eq_run {
+    bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
+      return r1 == r2;
+    }
+  };
+
+  // The base address of the memory region that's managed by this allocator.
+  byte* base_;
+
+  // The footprint in bytes of the currently allocated portion of the
+  // memory region.
+  size_t footprint_;
+
+  // The maximum footprint. The address, base_ + capacity_, indicates
+  // the end of the memory region that's managed by this allocator.
+  size_t capacity_;
+
+  // The run sets that hold the runs whose slots are not all
+  // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
+  std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
+  // The run sets that hold the runs whose slots are all full. This is
+  // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
+  hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
+  // The set of free pages.
+  std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
+  // The free page run whose end address is the end of the memory
+  // region that's managed by this allocator, if any.
+  FreePageRun* last_free_page_run_;
+  // The current runs where the allocations are first attempted for
+  // the size brackes that do not use thread-local
+  // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
+  Run* current_runs_[kNumOfSizeBrackets];
+  // The mutexes, one per size bracket.
+  Mutex* size_bracket_locks_[kNumOfSizeBrackets];
+  // The types of page map entries.
+  enum {
+    kPageMapEmpty           = 0,  // Not allocated.
+    kPageMapRun             = 1,  // The beginning of a run.
+    kPageMapRunPart         = 2,  // The non-beginning part of a run.
+    kPageMapLargeObject     = 3,  // The beginning of a large object.
+    kPageMapLargeObjectPart = 4,  // The non-beginning part of a large object.
+  };
+  // The table that indicates what pages are currently used for.
+  std::vector<byte> page_map_ GUARDED_BY(lock_);
+  // The table that indicates the size of free page runs. These sizes
+  // are stored here to avoid storing in the free page header and
+  // release backing pages.
+  std::vector<size_t> free_page_run_size_map_ GUARDED_BY(lock_);
+  // The global lock. Used to guard the page map, the free page set,
+  // and the footprint.
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // The reader-writer lock to allow one bulk free at a time while
+  // allowing multiple individual frees at the same time.
+  ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // The base address of the memory region that's managed by this allocator.
+  byte* Begin() { return base_; }
+  // The end address of the memory region that's managed by this allocator.
+  byte* End() { return base_ + capacity_; }
+
+  // Page-granularity alloc/free
+  void* AllocPages(Thread* self, size_t num_pages, byte page_map_type)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Allocate/free a run slot.
+  void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void FreeFromRun(Thread* self, void* ptr, Run* run)
+      LOCKS_EXCLUDED(lock_);
+
+  // Used to acquire a new/reused run for a size bracket. Used when a
+  // thread-local or current run gets full.
+  Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
+
+  // The internal of non-bulk Free().
+  void FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
+
+  // Allocates large objects.
+  void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
+
+ public:
+  RosAlloc(void* base, size_t capacity);
+  void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void Free(Thread* self, void* ptr)
+      LOCKS_EXCLUDED(bulk_free_lock_);
+  void BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
+      LOCKS_EXCLUDED(bulk_free_lock_);
+  // Returns the size of the allocated slot for a given allocated memory chunk.
+  size_t UsableSize(void* ptr);
+  // Returns the size of the allocated slot for a given size.
+  size_t UsableSize(size_t bytes) {
+    if (UNLIKELY(bytes > kLargeSizeThreshold)) {
+      return RoundUp(bytes, kPageSize);
+    } else {
+      return RoundToBracketSize(bytes);
+    }
+  }
+  // Try to reduce the current footprint by releasing the free page
+  // run at the end of the memory region, if any.
+  bool Trim();
+  // Iterates over all the memory slots and apply the given function.
+  void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+                  void* arg)
+      LOCKS_EXCLUDED(lock_);
+  // Returns the current footprint.
+  size_t Footprint() LOCKS_EXCLUDED(lock_);
+  // Returns the current capacity, maximum footprint.
+  size_t FootprintLimit() LOCKS_EXCLUDED(lock_);
+  // Update the current capacity.
+  void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_);
+  // Releases the thread-local runs assigned to the given thread back to the common set of runs.
+  void RevokeThreadLocalRuns(Thread* thread);
+  // Releases the thread-local runs assigned to all the threads back to the common set of runs.
+  void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+  // Dumps the page map for debugging.
+  void DumpPageMap(Thread* self);
+
+  // Callbacks for InspectAll that will count the number of bytes
+  // allocated and objects allocated, respectively.
+  static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+  static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+};
+
+}  // namespace allocator
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 6691cad..cf301fe 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -21,6 +21,7 @@
 
 #include "garbage_collector.h"
 
+#include "base/histogram-inl.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
 #include "gc/accounting/heap_bitmap.h"
@@ -36,9 +37,11 @@
 GarbageCollector::GarbageCollector(Heap* heap, const std::string& name)
     : heap_(heap),
       name_(name),
+      clear_soft_references_(false),
       verbose_(VLOG_IS_ON(heap)),
       duration_ns_(0),
       timings_(name_.c_str(), true, verbose_),
+      pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount),
       cumulative_timings_(name) {
   ResetCumulativeStatistics();
 }
@@ -54,17 +57,24 @@
 
 void GarbageCollector::ResetCumulativeStatistics() {
   cumulative_timings_.Reset();
+  pause_histogram_.Reset();
   total_time_ns_ = 0;
-  total_paused_time_ns_ = 0;
   total_freed_objects_ = 0;
   total_freed_bytes_ = 0;
 }
 
-void GarbageCollector::Run() {
+void GarbageCollector::Run(bool clear_soft_references) {
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   uint64_t start_time = NanoTime();
   pause_times_.clear();
   duration_ns_ = 0;
+  clear_soft_references_ = clear_soft_references;
+
+  // Reset stats.
+  freed_bytes_ = 0;
+  freed_large_object_bytes_ = 0;
+  freed_objects_ = 0;
+  freed_large_objects_ = 0;
 
   InitializePhase();
 
@@ -75,10 +85,10 @@
     thread_list->SuspendAll();
     MarkingPhase();
     ReclaimPhase();
+    GetHeap()->RevokeAllThreadLocalBuffers();
     thread_list->ResumeAll();
     ATRACE_END();
-    uint64_t pause_end = NanoTime();
-    pause_times_.push_back(pause_end - pause_start);
+    RegisterPause(NanoTime() - pause_start);
   } else {
     Thread* self = Thread::Current();
     {
@@ -93,23 +103,28 @@
       ATRACE_END();
       ATRACE_BEGIN("All mutator threads suspended");
       done = HandleDirtyObjectsPhase();
+      if (done) {
+        GetHeap()->RevokeAllThreadLocalBuffers();
+      }
       ATRACE_END();
       uint64_t pause_end = NanoTime();
       ATRACE_BEGIN("Resuming mutator threads");
       thread_list->ResumeAll();
       ATRACE_END();
-      pause_times_.push_back(pause_end - pause_start);
+      RegisterPause(pause_end - pause_start);
     }
     {
       ReaderMutexLock mu(self, *Locks::mutator_lock_);
       ReclaimPhase();
     }
   }
-
+  FinishPhase();
   uint64_t end_time = NanoTime();
   duration_ns_ = end_time - start_time;
-
-  FinishPhase();
+  total_time_ns_ += GetDurationNs();
+  for (uint64_t pause_time : pause_times_) {
+    pause_histogram_.AddValue(pause_time / 1000);
+  }
 }
 
 void GarbageCollector::SwapBitmaps() {
@@ -127,7 +142,7 @@
       if (live_bitmap != mark_bitmap) {
         heap_->GetLiveBitmap()->ReplaceBitmap(live_bitmap, mark_bitmap);
         heap_->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-        space->AsDlMallocSpace()->SwapBitmaps();
+        space->AsMallocSpace()->SwapBitmaps();
       }
     }
   }
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 0f566c9..1779339 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -17,10 +17,10 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_GARBAGE_COLLECTOR_H_
 #define ART_RUNTIME_GC_COLLECTOR_GARBAGE_COLLECTOR_H_
 
+#include "base/histogram.h"
+#include "base/timing_logger.h"
 #include "gc_type.h"
 #include "locks.h"
-#include "base/timing_logger.h"
-
 #include <stdint.h>
 #include <vector>
 
@@ -46,7 +46,7 @@
   virtual GcType GetGcType() const = 0;
 
   // Run the garbage collector.
-  void Run();
+  void Run(bool clear_soft_references);
 
   Heap* GetHeap() const {
     return heap_;
@@ -64,7 +64,7 @@
 
   void RegisterPause(uint64_t nano_length);
 
-  base::TimingLogger& GetTimings() {
+  TimingLogger& GetTimings() {
     return timings_;
   }
 
@@ -78,6 +78,38 @@
   // this is the allocation space, for full GC then we swap the zygote bitmaps too.
   void SwapBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  size_t GetFreedBytes() const {
+    return freed_bytes_;
+  }
+
+  size_t GetFreedLargeObjectBytes() const {
+    return freed_large_object_bytes_;
+  }
+
+  size_t GetFreedObjects() const {
+    return freed_objects_;
+  }
+
+  size_t GetFreedLargeObjects() const {
+    return freed_large_objects_;
+  }
+
+  uint64_t GetTotalPausedTimeNs() const {
+    return pause_histogram_.Sum();
+  }
+
+  uint64_t GetTotalFreedBytes() const {
+    return total_freed_bytes_;
+  }
+
+  uint64_t GetTotalFreedObjects() const {
+    return total_freed_objects_;
+  }
+
+  const Histogram<uint64_t>& GetPauseHistogram() const {
+    return pause_histogram_;
+  }
+
  protected:
   // The initial phase. Done without mutators paused.
   virtual void InitializePhase() = 0;
@@ -94,21 +126,32 @@
   // Called after the GC is finished. Done without mutators paused.
   virtual void FinishPhase() = 0;
 
+  static constexpr size_t kPauseBucketSize = 500;
+  static constexpr size_t kPauseBucketCount = 32;
+
   Heap* const heap_;
 
   std::string name_;
 
+  bool clear_soft_references_;
+
   const bool verbose_;
 
   uint64_t duration_ns_;
-  base::TimingLogger timings_;
+  TimingLogger timings_;
 
   // Cumulative statistics.
+  Histogram<uint64_t> pause_histogram_;
   uint64_t total_time_ns_;
-  uint64_t total_paused_time_ns_;
   uint64_t total_freed_objects_;
   uint64_t total_freed_bytes_;
 
+  // Single GC statitstics.
+  AtomicInteger freed_bytes_;
+  AtomicInteger freed_large_object_bytes_;
+  AtomicInteger freed_objects_;
+  AtomicInteger freed_large_objects_;
+
   CumulativeLogger cumulative_timings_;
 
   std::vector<uint64_t> pause_times_;
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 270c9ef..7a51553 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -44,8 +44,7 @@
     if (klass->IsObjectArrayClass()) {
       VisitObjectArrayReferences(obj->AsObjectArray<mirror::Object>(), visitor);
     }
-  } else if (UNLIKELY(klass == java_lang_Class_)) {
-    DCHECK_EQ(klass->GetClass(), java_lang_Class_);
+  } else if (UNLIKELY(klass == mirror::Class::GetJavaLangClass())) {
     if (kCountScannedTypes) {
       ++class_count_;
     }
@@ -56,7 +55,7 @@
     }
     VisitOtherReferences(klass, obj, visitor);
     if (UNLIKELY(klass->IsReferenceClass())) {
-      DelayReferenceReferent(klass, const_cast<mirror::Object*>(obj));
+      DelayReferenceReferent(klass, obj);
     }
   }
 }
@@ -68,11 +67,10 @@
                           Locks::mutator_lock_) {
   DCHECK(obj != NULL);
   DCHECK(obj->GetClass() != NULL);
-
   mirror::Class* klass = obj->GetClass();
   DCHECK(klass != NULL);
   if (visit_class) {
-    visitor(obj, klass, MemberOffset(0), false);
+    visitor(obj, klass, mirror::Object::ClassOffset(), false);
   }
   if (klass == mirror::Class::GetJavaLangClass()) {
     DCHECK_EQ(klass->GetClass(), mirror::Class::GetJavaLangClass());
@@ -90,8 +88,7 @@
 }
 
 template <typename Visitor>
-inline void MarkSweep::VisitInstanceFieldsReferences(mirror::Class* klass,
-                                                     mirror::Object* obj,
+inline void MarkSweep::VisitInstanceFieldsReferences(mirror::Class* klass, mirror::Object* obj,
                                                      const Visitor& visitor)
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
   DCHECK(obj != NULL);
@@ -119,11 +116,6 @@
                                              bool is_static, const Visitor& visitor) {
   if (LIKELY(ref_offsets != CLASS_WALK_SUPER)) {
     // Found a reference offset bitmap.  Mark the specified offsets.
-#ifndef MOVING_COLLECTOR
-    // Clear the class bit since we mark the class as part of marking the classlinker roots.
-    DCHECK_EQ(mirror::Object::ClassOffset().Uint32Value(), 0U);
-    ref_offsets &= (1U << (sizeof(ref_offsets) * 8 - 1)) - 1;
-#endif
     while (ref_offsets != 0) {
       size_t right_shift = CLZ(ref_offsets);
       MemberOffset field_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 2c69c77..0697a65 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -93,6 +93,8 @@
   }
 
   // Add the space to the immune region.
+  // TODO: Use space limits instead of current end_ since the end_ can be changed by dlmalloc
+  // callbacks.
   if (immune_begin_ == NULL) {
     DCHECK(immune_end_ == NULL);
     SetImmuneRange(reinterpret_cast<Object*>(space->Begin()),
@@ -108,14 +110,14 @@
     }
     // If previous space was immune, then extend the immune region. Relies on continuous spaces
     // being sorted by Heap::AddContinuousSpace.
-    if (prev_space != NULL && IsImmuneSpace(prev_space)) {
+    if (prev_space != nullptr && IsImmuneSpace(prev_space)) {
       immune_begin_ = std::min(reinterpret_cast<Object*>(space->Begin()), immune_begin_);
       immune_end_ = std::max(reinterpret_cast<Object*>(space->End()), immune_end_);
     }
   }
 }
 
-bool MarkSweep::IsImmuneSpace(const space::ContinuousSpace* space) {
+bool MarkSweep::IsImmuneSpace(const space::ContinuousSpace* space) const {
   return
       immune_begin_ <= reinterpret_cast<Object*>(space->Begin()) &&
       immune_end_ >= reinterpret_cast<Object*>(space->End());
@@ -135,10 +137,9 @@
 
 MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
     : GarbageCollector(heap,
-                       name_prefix + (name_prefix.empty() ? "" : " ") +
+                       name_prefix +
                        (is_concurrent ? "concurrent mark sweep": "mark sweep")),
       current_mark_bitmap_(NULL),
-      java_lang_Class_(NULL),
       mark_stack_(NULL),
       immune_begin_(NULL),
       immune_end_(NULL),
@@ -147,16 +148,16 @@
       finalizer_reference_list_(NULL),
       phantom_reference_list_(NULL),
       cleared_reference_list_(NULL),
+      live_stack_freeze_size_(0),
       gc_barrier_(new Barrier(0)),
       large_object_lock_("mark sweep large object lock", kMarkSweepLargeObjectLock),
       mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock),
-      is_concurrent_(is_concurrent),
-      clear_soft_references_(false) {
+      is_concurrent_(is_concurrent) {
 }
 
 void MarkSweep::InitializePhase() {
   timings_.Reset();
-  base::TimingLogger::ScopedSplit split("InitializePhase", &timings_);
+  TimingLogger::ScopedSplit split("InitializePhase", &timings_);
   mark_stack_ = heap_->mark_stack_.get();
   DCHECK(mark_stack_ != nullptr);
   SetImmuneRange(nullptr, nullptr);
@@ -165,10 +166,6 @@
   finalizer_reference_list_ = nullptr;
   phantom_reference_list_ = nullptr;
   cleared_reference_list_ = nullptr;
-  freed_bytes_ = 0;
-  freed_large_object_bytes_ = 0;
-  freed_objects_ = 0;
-  freed_large_objects_ = 0;
   class_count_ = 0;
   array_count_ = 0;
   other_count_ = 0;
@@ -179,8 +176,6 @@
   work_chunks_created_ = 0;
   work_chunks_deleted_ = 0;
   reference_count_ = 0;
-  java_lang_Class_ = Class::GetJavaLangClass();
-  CHECK(java_lang_Class_ != nullptr);
 
   FindDefaultMarkBitmap();
 
@@ -190,14 +185,14 @@
 }
 
 void MarkSweep::ProcessReferences(Thread* self) {
-  base::TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
+  TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  ProcessReferences(&soft_reference_list_, clear_soft_references_, &weak_reference_list_,
-                    &finalizer_reference_list_, &phantom_reference_list_);
+  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &IsMarkedCallback,
+                               &RecursiveMarkObjectCallback, this);
 }
 
 bool MarkSweep::HandleDirtyObjectsPhase() {
-  base::TimingLogger::ScopedSplit split("HandleDirtyObjectsPhase", &timings_);
+  TimingLogger::ScopedSplit split("HandleDirtyObjectsPhase", &timings_);
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
 
@@ -243,7 +238,7 @@
 }
 
 void MarkSweep::MarkingPhase() {
-  base::TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
+  TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
   Thread* self = Thread::Current();
 
   BindBitmaps();
@@ -277,7 +272,7 @@
     if (IsImmuneSpace(space)) {
       const char* name = space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
           "UpdateAndMarkImageModUnionTable";
-      base::TimingLogger::ScopedSplit split(name, &timings_);
+      TimingLogger::ScopedSplit split(name, &timings_);
       accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
       CHECK(mod_union_table != nullptr);
       mod_union_table->UpdateAndMarkReferences(MarkRootCallback, this);
@@ -294,8 +289,7 @@
   // knowing that new allocations won't be marked as live.
   timings_.StartSplit("MarkStackAsLive");
   accounting::ObjectStack* live_stack = heap_->GetLiveStack();
-  heap_->MarkAllocStack(heap_->alloc_space_->GetLiveBitmap(),
-                        heap_->large_object_space_->GetLiveObjects(), live_stack);
+  heap_->MarkAllocStackAsLive(live_stack);
   live_stack->Reset();
   timings_.EndSplit();
   // Recursively mark all the non-image bits set in the mark bitmap.
@@ -303,7 +297,7 @@
 }
 
 void MarkSweep::ReclaimPhase() {
-  base::TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
+  TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
   Thread* self = Thread::Current();
 
   if (!IsConcurrent()) {
@@ -318,7 +312,7 @@
   if (IsConcurrent()) {
     Runtime::Current()->AllowNewSystemWeaks();
 
-    base::TimingLogger::ScopedSplit split("UnMarkAllocStack", &timings_);
+    TimingLogger::ScopedSplit split("UnMarkAllocStack", &timings_);
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     accounting::ObjectStack* allocation_stack = GetHeap()->allocation_stack_.get();
     // The allocation stack contains things allocated since the start of the GC. These may have been
@@ -369,10 +363,12 @@
 }
 
 void MarkSweep::FindDefaultMarkBitmap() {
-  base::TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
+  TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
-      current_mark_bitmap_ = space->GetMarkBitmap();
+    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
+    if (bitmap != nullptr &&
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
+      current_mark_bitmap_ = bitmap;
       CHECK(current_mark_bitmap_ != NULL);
       return;
     }
@@ -411,6 +407,13 @@
   }
 }
 
+mirror::Object* MarkSweep::RecursiveMarkObjectCallback(mirror::Object* obj, void* arg) {
+  MarkSweep* mark_sweep = reinterpret_cast<MarkSweep*>(arg);
+  mark_sweep->MarkObject(obj);
+  mark_sweep->ProcessMarkStack(true);
+  return obj;
+}
+
 inline void MarkSweep::UnMarkObjectNonNull(const Object* obj) {
   DCHECK(!IsImmune(obj));
   // Try to take advantage of locality of references within a space, failing this find the space
@@ -610,13 +613,11 @@
 }
 
 void MarkSweep::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsDlMallocSpace());
-  space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+  CHECK(space->IsMallocSpace());
+  space::MallocSpace* alloc_space = space->AsMallocSpace();
   accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = alloc_space->mark_bitmap_.release();
+  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
   GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-  alloc_space->temp_bitmap_.reset(mark_bitmap);
-  alloc_space->mark_bitmap_.reset(live_bitmap);
 }
 
 class ScanObjectVisitor {
@@ -625,7 +626,7 @@
       : mark_sweep_(mark_sweep) {}
 
   // TODO: Fixme when anotatalysis works with visitors.
-  void operator()(const Object* obj) const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(Object* obj) const ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS {
     if (kCheckLocks) {
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
@@ -814,6 +815,9 @@
     const size_t mark_stack_delta = std::min(CardScanTask::kMaxSize / 2,
                                              mark_stack_size / mark_stack_tasks + 1);
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+      if (space->GetMarkBitmap() == nullptr) {
+        continue;
+      }
       byte* card_begin = space->Begin();
       byte* card_end = space->End();
       // Align up the end address. For example, the image space's end
@@ -856,24 +860,26 @@
     timings_.EndSplit();
   } else {
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-      // Image spaces are handled properly since live == marked for them.
-      switch (space->GetGcRetentionPolicy()) {
-        case space::kGcRetentionPolicyNeverCollect:
-          timings_.StartSplit(paused ? "(Paused)ScanGrayImageSpaceObjects" :
-              "ScanGrayImageSpaceObjects");
-          break;
-        case space::kGcRetentionPolicyFullCollect:
-          timings_.StartSplit(paused ? "(Paused)ScanGrayZygoteSpaceObjects" :
-              "ScanGrayZygoteSpaceObjects");
-          break;
-        case space::kGcRetentionPolicyAlwaysCollect:
-          timings_.StartSplit(paused ? "(Paused)ScanGrayAllocSpaceObjects" :
-              "ScanGrayAllocSpaceObjects");
-          break;
-        }
-      ScanObjectVisitor visitor(this);
-      card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, minimum_age);
-      timings_.EndSplit();
+      if (space->GetMarkBitmap() != nullptr) {
+        // Image spaces are handled properly since live == marked for them.
+        switch (space->GetGcRetentionPolicy()) {
+          case space::kGcRetentionPolicyNeverCollect:
+            timings_.StartSplit(paused ? "(Paused)ScanGrayImageSpaceObjects" :
+                "ScanGrayImageSpaceObjects");
+            break;
+          case space::kGcRetentionPolicyFullCollect:
+            timings_.StartSplit(paused ? "(Paused)ScanGrayZygoteSpaceObjects" :
+                "ScanGrayZygoteSpaceObjects");
+            break;
+          case space::kGcRetentionPolicyAlwaysCollect:
+            timings_.StartSplit(paused ? "(Paused)ScanGrayAllocSpaceObjects" :
+                "ScanGrayAllocSpaceObjects");
+            break;
+          }
+        ScanObjectVisitor visitor(this);
+        card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, minimum_age);
+        timings_.EndSplit();
+      }
     }
   }
 }
@@ -933,7 +939,7 @@
 // Populates the mark stack based on the set of marked objects and
 // recursively marks until the mark stack is emptied.
 void MarkSweep::RecursiveMark() {
-  base::TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
+  TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
   // RecursiveMark will build the lists of known instances of the Reference classes.
   // See DelayReferenceReferent for details.
   CHECK(soft_reference_list_ == NULL);
@@ -954,9 +960,8 @@
       if ((space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) ||
           (!partial && space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
         current_mark_bitmap_ = space->GetMarkBitmap();
-        if (current_mark_bitmap_ == NULL) {
-          GetHeap()->DumpSpaces();
-          LOG(FATAL) << "invalid bitmap";
+        if (current_mark_bitmap_ == nullptr) {
+          continue;
         }
         if (parallel) {
           // We will use the mark stack the future.
@@ -994,7 +999,7 @@
   ProcessMarkStack(false);
 }
 
-mirror::Object* MarkSweep::SystemWeakIsMarkedCallback(Object* object, void* arg) {
+mirror::Object* MarkSweep::IsMarkedCallback(Object* object, void* arg) {
   if (reinterpret_cast<MarkSweep*>(arg)->IsMarked(object)) {
     return object;
   }
@@ -1015,7 +1020,7 @@
 void MarkSweep::SweepSystemWeaks() {
   Runtime* runtime = Runtime::Current();
   timings_.StartSplit("SweepSystemWeaks");
-  runtime->SweepSystemWeaks(SystemWeakIsMarkedCallback, this);
+  runtime->SweepSystemWeaks(IsMarkedCallback, this);
   timings_.EndSplit();
 }
 
@@ -1121,7 +1126,7 @@
 }
 
 void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) {
-  space::DlMallocSpace* space = heap_->GetAllocSpace();
+  space::MallocSpace* space = heap_->GetNonMovingSpace();
   timings_.StartSplit("SweepArray");
   // Newly allocated objects MUST be in the alloc space and those are the only objects which we are
   // going to free.
@@ -1200,15 +1205,18 @@
 
 void MarkSweep::Sweep(bool swap_bitmaps) {
   DCHECK(mark_stack_->IsEmpty());
-  base::TimingLogger::ScopedSplit("Sweep", &timings_);
+  TimingLogger::ScopedSplit("Sweep", &timings_);
 
   const bool partial = (GetGcType() == kGcTypePartial);
   SweepCallbackContext scc;
   scc.mark_sweep = this;
   scc.self = Thread::Current();
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (!space->IsMallocSpace()) {
+      continue;
+    }
     // We always sweep always collect spaces.
-    bool sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect);
+    bool sweep_space = space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect;
     if (!partial && !sweep_space) {
       // We sweep full collect spaces when the GC isn't a partial GC (ie its full).
       sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect);
@@ -1216,19 +1224,19 @@
     if (sweep_space) {
       uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
       uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-      scc.space = space->AsDlMallocSpace();
+      scc.space = space->AsMallocSpace();
       accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
       if (swap_bitmaps) {
         std::swap(live_bitmap, mark_bitmap);
       }
       if (!space->IsZygoteSpace()) {
-        base::TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
+        TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
         // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
                                            &SweepCallback, reinterpret_cast<void*>(&scc));
       } else {
-        base::TimingLogger::ScopedSplit split("SweepZygote", &timings_);
+        TimingLogger::ScopedSplit split("SweepZygote", &timings_);
         // Zygote sweep takes care of dirtying cards and clearing live bits, does not free actual
         // memory.
         accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
@@ -1241,7 +1249,7 @@
 }
 
 void MarkSweep::SweepLargeObjects(bool swap_bitmaps) {
-  base::TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
+  TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
   // Sweep large objects
   space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
   accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
@@ -1266,7 +1274,7 @@
 
 void MarkSweep::CheckReference(const Object* obj, const Object* ref, MemberOffset offset, bool is_static) {
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace() && space->Contains(ref)) {
+    if (space->IsMallocSpace() && space->Contains(ref)) {
       DCHECK(IsMarked(obj));
 
       bool is_marked = IsMarked(ref);
@@ -1313,40 +1321,7 @@
   DCHECK(klass != nullptr);
   DCHECK(klass->IsReferenceClass());
   DCHECK(obj != NULL);
-  Object* referent = heap_->GetReferenceReferent(obj);
-  if (referent != NULL && !IsMarked(referent)) {
-    if (kCountJavaLangRefs) {
-      ++reference_count_;
-    }
-    Thread* self = Thread::Current();
-    // TODO: Remove these locks, and use atomic stacks for storing references?
-    // We need to check that the references haven't already been enqueued since we can end up
-    // scanning the same reference multiple times due to dirty cards.
-    if (klass->IsSoftReferenceClass()) {
-      MutexLock mu(self, *heap_->GetSoftRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &soft_reference_list_);
-      }
-    } else if (klass->IsWeakReferenceClass()) {
-      MutexLock mu(self, *heap_->GetWeakRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &weak_reference_list_);
-      }
-    } else if (klass->IsFinalizerReferenceClass()) {
-      MutexLock mu(self, *heap_->GetFinalizerRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &finalizer_reference_list_);
-      }
-    } else if (klass->IsPhantomReferenceClass()) {
-      MutexLock mu(self, *heap_->GetPhantomRefQueueLock());
-      if (!heap_->IsEnqueued(obj)) {
-        heap_->EnqueuePendingReference(obj, &phantom_reference_list_);
-      }
-    } else {
-      LOG(FATAL) << "Invalid reference type " << PrettyClass(klass)
-                 << " " << std::hex << klass->GetAccessFlags();
-    }
-  }
+  heap_->DelayReferenceReferent(klass, obj, IsMarkedCallback, this);
 }
 
 class MarkObjectVisitor {
@@ -1370,9 +1345,9 @@
 
 // Scans an object reference.  Determines the type of the reference
 // and dispatches to a specialized scanning routine.
-void MarkSweep::ScanObject(const Object* obj) {
+void MarkSweep::ScanObject(Object* obj) {
   MarkObjectVisitor visitor(this);
-  ScanObjectVisit(const_cast<Object*>(obj), visitor);
+  ScanObjectVisit(obj, visitor);
 }
 
 void MarkSweep::ProcessMarkStackParallel(size_t thread_count) {
@@ -1406,12 +1381,12 @@
   } else {
     // TODO: Tune this.
     static const size_t kFifoSize = 4;
-    BoundedFifoPowerOfTwo<const Object*, kFifoSize> prefetch_fifo;
+    BoundedFifoPowerOfTwo<Object*, kFifoSize> prefetch_fifo;
     for (;;) {
-      const Object* obj = NULL;
+      Object* obj = NULL;
       if (kUseMarkStackPrefetch) {
         while (!mark_stack_->IsEmpty() && prefetch_fifo.size() < kFifoSize) {
-          const Object* obj = mark_stack_->PopBack();
+          Object* obj = mark_stack_->PopBack();
           DCHECK(obj != NULL);
           __builtin_prefetch(obj);
           prefetch_fifo.push_back(obj);
@@ -1434,43 +1409,6 @@
   timings_.EndSplit();
 }
 
-// Walks the reference list marking any references subject to the
-// reference clearing policy.  References with a black referent are
-// removed from the list.  References with white referents biased
-// toward saving are blackened and also removed from the list.
-void MarkSweep::PreserveSomeSoftReferences(Object** list) {
-  DCHECK(list != NULL);
-  Object* clear = NULL;
-  size_t counter = 0;
-
-  DCHECK(mark_stack_->IsEmpty());
-
-  timings_.StartSplit("PreserveSomeSoftReferences");
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent == NULL) {
-      // Referent was cleared by the user during marking.
-      continue;
-    }
-    bool is_marked = IsMarked(referent);
-    if (!is_marked && ((++counter) & 1)) {
-      // Referent is white and biased toward saving, mark it.
-      MarkObject(referent);
-      is_marked = true;
-    }
-    if (!is_marked) {
-      // Referent is white, queue it for clearing.
-      heap_->EnqueuePendingReference(ref, &clear);
-    }
-  }
-  *list = clear;
-  timings_.EndSplit();
-
-  // Restart the mark with the newly black references added to the root set.
-  ProcessMarkStack(true);
-}
-
 inline bool MarkSweep::IsMarked(const Object* object) const
     SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
   if (IsImmune(object)) {
@@ -1483,103 +1421,11 @@
   return heap_->GetMarkBitmap()->Test(object);
 }
 
-// Unlink the reference list clearing references objects with white
-// referents.  Cleared references registered to a reference queue are
-// scheduled for appending by the heap worker thread.
-void MarkSweep::ClearWhiteReferences(Object** list) {
-  DCHECK(list != NULL);
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent != NULL && !IsMarked(referent)) {
-      // Referent is white, clear it.
-      heap_->ClearReferenceReferent(ref);
-      if (heap_->IsEnqueuable(ref)) {
-        heap_->EnqueueReference(ref, &cleared_reference_list_);
-      }
-    }
-  }
-  DCHECK(*list == NULL);
-}
-
-// Enqueues finalizer references with white referents.  White
-// referents are blackened, moved to the zombie field, and the
-// referent field is cleared.
-void MarkSweep::EnqueueFinalizerReferences(Object** list) {
-  DCHECK(list != NULL);
-  timings_.StartSplit("EnqueueFinalizerReferences");
-  MemberOffset zombie_offset = heap_->GetFinalizerReferenceZombieOffset();
-  bool has_enqueued = false;
-  while (*list != NULL) {
-    Object* ref = heap_->DequeuePendingReference(list);
-    Object* referent = heap_->GetReferenceReferent(ref);
-    if (referent != NULL && !IsMarked(referent)) {
-      MarkObject(referent);
-      // If the referent is non-null the reference must queuable.
-      DCHECK(heap_->IsEnqueuable(ref));
-      ref->SetFieldObject(zombie_offset, referent, false);
-      heap_->ClearReferenceReferent(ref);
-      heap_->EnqueueReference(ref, &cleared_reference_list_);
-      has_enqueued = true;
-    }
-  }
-  timings_.EndSplit();
-  if (has_enqueued) {
-    ProcessMarkStack(true);
-  }
-  DCHECK(*list == NULL);
-}
-
-// Process reference class instances and schedule finalizations.
-void MarkSweep::ProcessReferences(Object** soft_references, bool clear_soft,
-                                  Object** weak_references,
-                                  Object** finalizer_references,
-                                  Object** phantom_references) {
-  CHECK(soft_references != NULL);
-  CHECK(weak_references != NULL);
-  CHECK(finalizer_references != NULL);
-  CHECK(phantom_references != NULL);
-  CHECK(mark_stack_->IsEmpty());
-
-  // Unless we are in the zygote or required to clear soft references
-  // with white references, preserve some white referents.
-  if (!clear_soft && !Runtime::Current()->IsZygote()) {
-    PreserveSomeSoftReferences(soft_references);
-  }
-
-  timings_.StartSplit("ProcessReferences");
-  // Clear all remaining soft and weak references with white
-  // referents.
-  ClearWhiteReferences(soft_references);
-  ClearWhiteReferences(weak_references);
-  timings_.EndSplit();
-
-  // Preserve all white objects with finalize methods and schedule
-  // them for finalization.
-  EnqueueFinalizerReferences(finalizer_references);
-
-  timings_.StartSplit("ProcessReferences");
-  // Clear all f-reachable soft and weak references with white
-  // referents.
-  ClearWhiteReferences(soft_references);
-  ClearWhiteReferences(weak_references);
-
-  // Clear all phantom references with white referents.
-  ClearWhiteReferences(phantom_references);
-
-  // At this point all reference lists should be empty.
-  DCHECK(*soft_references == NULL);
-  DCHECK(*weak_references == NULL);
-  DCHECK(*finalizer_references == NULL);
-  DCHECK(*phantom_references == NULL);
-  timings_.EndSplit();
-}
-
 void MarkSweep::UnBindBitmaps() {
-  base::TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsDlMallocSpace()) {
-      space::DlMallocSpace* alloc_space = space->AsDlMallocSpace();
+    if (space->IsMallocSpace()) {
+      space::MallocSpace* alloc_space = space->AsMallocSpace();
       if (alloc_space->temp_bitmap_.get() != NULL) {
         // At this point, the temp_bitmap holds our old mark bitmap.
         accounting::SpaceBitmap* new_bitmap = alloc_space->temp_bitmap_.release();
@@ -1593,26 +1439,16 @@
 }
 
 void MarkSweep::FinishPhase() {
-  base::TimingLogger::ScopedSplit split("FinishPhase", &timings_);
+  TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   // Can't enqueue references if we hold the mutator lock.
-  Object* cleared_references = GetClearedReferences();
   Heap* heap = GetHeap();
-  timings_.NewSplit("EnqueueClearedReferences");
-  heap->EnqueueClearedReferences(&cleared_references);
-
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
 
-  timings_.NewSplit("GrowForUtilization");
-  heap->GrowForUtilization(GetGcType(), GetDurationNs());
-
   timings_.NewSplit("RequestHeapTrim");
   heap->RequestHeapTrim();
 
   // Update the cumulative statistics
-  total_time_ns_ += GetDurationNs();
-  total_paused_time_ns_ += std::accumulate(GetPauseTimes().begin(), GetPauseTimes().end(), 0,
-                                           std::plus<uint64_t>());
   total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
   total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
 
@@ -1651,8 +1487,10 @@
 
   // Clear all of the spaces' mark bitmaps.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() != space::kGcRetentionPolicyNeverCollect) {
-      space->GetMarkBitmap()->Clear();
+    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
+    if (bitmap != nullptr &&
+        space->GetGcRetentionPolicy() != space::kGcRetentionPolicyNeverCollect) {
+      bitmap->Clear();
     }
   }
   mark_stack_->Reset();
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 3bc014a..53d85b0 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -114,7 +114,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsImmuneSpace(const space::ContinuousSpace* space)
+  bool IsImmuneSpace(const space::ContinuousSpace* space) const;
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
@@ -140,6 +140,7 @@
   void ProcessReferences(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Update and mark references from immune spaces.
   virtual void UpdateAndMarkModUnion()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -158,7 +159,7 @@
   }
 
   // Blackens an object.
-  void ScanObject(const mirror::Object* obj)
+  void ScanObject(mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -167,38 +168,6 @@
   void ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor)
       NO_THREAD_SAFETY_ANALYSIS;
 
-  size_t GetFreedBytes() const {
-    return freed_bytes_;
-  }
-
-  size_t GetFreedLargeObjectBytes() const {
-    return freed_large_object_bytes_;
-  }
-
-  size_t GetFreedObjects() const {
-    return freed_objects_;
-  }
-
-  size_t GetFreedLargeObjects() const {
-    return freed_large_objects_;
-  }
-
-  uint64_t GetTotalTimeNs() const {
-    return total_time_ns_;
-  }
-
-  uint64_t GetTotalPausedTimeNs() const {
-    return total_paused_time_ns_;
-  }
-
-  uint64_t GetTotalFreedObjects() const {
-    return total_freed_objects_;
-  }
-
-  uint64_t GetTotalFreedBytes() const {
-    return total_freed_bytes_;
-  }
-
   // Everything inside the immune range is assumed to be marked.
   void SetImmuneRange(mirror::Object* begin, mirror::Object* end);
 
@@ -216,11 +185,14 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   template <typename Visitor>
-  static void VisitObjectReferences(mirror::Object* obj, const Visitor& visitor,
-                                    bool visit_class = false)
+  static void VisitObjectReferences(mirror::Object* obj, const Visitor& visitor, bool visit_class)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_,
                             Locks::mutator_lock_);
 
+  static mirror::Object* RecursiveMarkObjectCallback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
@@ -244,10 +216,7 @@
   // Returns true if the object has its bit set in the mark bitmap.
   bool IsMarked(const mirror::Object* object) const;
 
-  static mirror::Object* SystemWeakIsMarkedCallback(mirror::Object* object, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  static mirror::Object* SystemWeakIsMarkedArrayCallback(mirror::Object* object, void* arg)
+  static mirror::Object* IsMarkedCallback(mirror::Object* object, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static void VerifyImageRootVisitor(mirror::Object* root, void* arg)
@@ -381,13 +350,6 @@
   void ClearWhiteReferences(mirror::Object** list)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  void ProcessReferences(mirror::Object** soft_references, bool clear_soft_references,
-                         mirror::Object** weak_references,
-                         mirror::Object** finalizer_references,
-                         mirror::Object** phantom_references)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Whether or not we count how many of each type of object were scanned.
   static const bool kCountScannedTypes = false;
 
@@ -395,9 +357,6 @@
   // object.
   accounting::SpaceBitmap* current_mark_bitmap_;
 
-  // Cache java.lang.Class for optimization.
-  mirror::Class* java_lang_Class_;
-
   accounting::ObjectStack* mark_stack_;
 
   // Immune range, every object inside the immune range is assumed to be marked.
@@ -412,14 +371,6 @@
 
   // Parallel finger.
   AtomicInteger atomic_finger_;
-  // Number of non large object bytes freed in this collection.
-  AtomicInteger freed_bytes_;
-  // Number of large object bytes freed.
-  AtomicInteger freed_large_object_bytes_;
-  // Number of objects freed in this collection.
-  AtomicInteger freed_objects_;
-  // Number of freed large objects.
-  AtomicInteger freed_large_objects_;
   // Number of classes scanned, if kCountScannedTypes.
   AtomicInteger class_count_;
   // Number of arrays scanned, if kCountScannedTypes.
@@ -443,8 +394,6 @@
 
   const bool is_concurrent_;
 
-  bool clear_soft_references_;
-
  private:
   friend class AddIfReachesAllocSpaceVisitor;  // Used by mod-union table.
   friend class CardScanTask;
diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc
index 29367ce..8ec28f3 100644
--- a/runtime/gc/collector/partial_mark_sweep.cc
+++ b/runtime/gc/collector/partial_mark_sweep.cc
@@ -26,7 +26,7 @@
 namespace collector {
 
 PartialMarkSweep::PartialMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
-    : MarkSweep(heap, is_concurrent, name_prefix + (name_prefix.empty() ? "" : " ") + "partial") {
+    : MarkSweep(heap, is_concurrent, name_prefix.empty() ? "partial " : name_prefix) {
   cumulative_timings_.SetName(GetName());
 }
 
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
new file mode 100644
index 0000000..3b8f7c3
--- /dev/null
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_INL_H_
+#define ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_INL_H_
+
+namespace art {
+namespace gc {
+namespace collector {
+
+inline mirror::Object* SemiSpace::GetForwardingAddressInFromSpace(mirror::Object* obj) const {
+  DCHECK(from_space_->HasAddress(obj));
+  LockWord lock_word = obj->GetLockWord();
+  if (lock_word.GetState() != LockWord::kForwardingAddress) {
+    return nullptr;
+  }
+  return reinterpret_cast<mirror::Object*>(lock_word.ForwardingAddress());
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_INL_H_
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
new file mode 100644
index 0000000..3939354
--- /dev/null
+++ b/runtime/gc/collector/semi_space.cc
@@ -0,0 +1,615 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "semi_space.h"
+
+#include <functional>
+#include <numeric>
+#include <climits>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex-inl.h"
+#include "base/timing_logger.h"
+#include "gc/accounting/heap_bitmap.h"
+#include "gc/accounting/mod_union_table.h"
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "gc/space/bump_pointer_space.h"
+#include "gc/space/bump_pointer_space-inl.h"
+#include "gc/space/image_space.h"
+#include "gc/space/large_object_space.h"
+#include "gc/space/space-inl.h"
+#include "indirect_reference_table.h"
+#include "intern_table.h"
+#include "jni_internal.h"
+#include "mark_sweep-inl.h"
+#include "monitor.h"
+#include "mirror/art_field.h"
+#include "mirror/art_field-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/class_loader.h"
+#include "mirror/dex_cache.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array.h"
+#include "mirror/object_array-inl.h"
+#include "runtime.h"
+#include "semi_space-inl.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+#include "verifier/method_verifier.h"
+
+using ::art::mirror::Class;
+using ::art::mirror::Object;
+
+namespace art {
+namespace gc {
+namespace collector {
+
+static constexpr bool kProtectFromSpace = true;
+static constexpr bool kResetFromSpace = true;
+
+// TODO: Unduplicate logic.
+void SemiSpace::ImmuneSpace(space::ContinuousSpace* space) {
+  // Bind live to mark bitmap if necessary.
+  if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
+    BindLiveToMarkBitmap(space);
+  }
+  // Add the space to the immune region.
+  if (immune_begin_ == nullptr) {
+    DCHECK(immune_end_ == nullptr);
+    immune_begin_ = reinterpret_cast<Object*>(space->Begin());
+    immune_end_ = reinterpret_cast<Object*>(space->End());
+  } else {
+    const space::ContinuousSpace* prev_space = nullptr;
+    // Find out if the previous space is immune.
+    for (space::ContinuousSpace* cur_space : GetHeap()->GetContinuousSpaces()) {
+      if (cur_space == space) {
+        break;
+      }
+      prev_space = cur_space;
+    }
+    // If previous space was immune, then extend the immune region. Relies on continuous spaces
+    // being sorted by Heap::AddContinuousSpace.
+    if (prev_space != nullptr && IsImmuneSpace(prev_space)) {
+      immune_begin_ = std::min(reinterpret_cast<Object*>(space->Begin()), immune_begin_);
+      immune_end_ = std::max(reinterpret_cast<Object*>(space->End()), immune_end_);
+    }
+  }
+}
+
+void SemiSpace::BindBitmaps() {
+  timings_.StartSplit("BindBitmaps");
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  // Mark all of the spaces we never collect as immune.
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
+        || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+      ImmuneSpace(space);
+    }
+  }
+  timings_.EndSplit();
+}
+
+SemiSpace::SemiSpace(Heap* heap, const std::string& name_prefix)
+    : GarbageCollector(heap,
+                       name_prefix + (name_prefix.empty() ? "" : " ") + "marksweep + semispace"),
+      mark_stack_(nullptr),
+      immune_begin_(nullptr),
+      immune_end_(nullptr),
+      to_space_(nullptr),
+      from_space_(nullptr),
+      soft_reference_list_(nullptr),
+      weak_reference_list_(nullptr),
+      finalizer_reference_list_(nullptr),
+      phantom_reference_list_(nullptr),
+      cleared_reference_list_(nullptr),
+      self_(nullptr) {
+}
+
+void SemiSpace::InitializePhase() {
+  timings_.Reset();
+  TimingLogger::ScopedSplit split("InitializePhase", &timings_);
+  mark_stack_ = heap_->mark_stack_.get();
+  DCHECK(mark_stack_ != nullptr);
+  immune_begin_ = nullptr;
+  immune_end_ = nullptr;
+  soft_reference_list_ = nullptr;
+  weak_reference_list_ = nullptr;
+  finalizer_reference_list_ = nullptr;
+  phantom_reference_list_ = nullptr;
+  cleared_reference_list_ = nullptr;
+  self_ = Thread::Current();
+  // Do any pre GC verification.
+  timings_.NewSplit("PreGcVerification");
+  heap_->PreGcVerification(this);
+}
+
+void SemiSpace::ProcessReferences(Thread* self) {
+  TimingLogger::ScopedSplit split("ProcessReferences", &timings_);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  GetHeap()->ProcessReferences(timings_, clear_soft_references_, &MarkedForwardingAddressCallback,
+                               &RecursiveMarkObjectCallback, this);
+}
+
+void SemiSpace::MarkingPhase() {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
+  // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
+  // wrong space.
+  heap_->SwapSemiSpaces();
+  // Assume the cleared space is already empty.
+  BindBitmaps();
+  // Process dirty cards and add dirty cards to mod-union tables.
+  heap_->ProcessCards(timings_);
+  // Need to do this before the checkpoint since we don't want any threads to add references to
+  // the live stack during the recursive mark.
+  timings_.NewSplit("SwapStacks");
+  heap_->SwapStacks();
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  MarkRoots();
+  // Mark roots of immune spaces.
+  UpdateAndMarkModUnion();
+  // Recursively mark remaining objects.
+  MarkReachableObjects();
+}
+
+bool SemiSpace::IsImmuneSpace(const space::ContinuousSpace* space) const {
+  return
+    immune_begin_ <= reinterpret_cast<Object*>(space->Begin()) &&
+    immune_end_ >= reinterpret_cast<Object*>(space->End());
+}
+
+void SemiSpace::UpdateAndMarkModUnion() {
+  for (auto& space : heap_->GetContinuousSpaces()) {
+    // If the space is immune then we need to mark the references to other spaces.
+    if (IsImmuneSpace(space)) {
+      accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
+      CHECK(table != nullptr);
+      // TODO: Improve naming.
+      TimingLogger::ScopedSplit split(
+          space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
+                                   "UpdateAndMarkImageModUnionTable",
+                                   &timings_);
+      table->UpdateAndMarkReferences(MarkRootCallback, this);
+    }
+  }
+}
+
+void SemiSpace::MarkReachableObjects() {
+  timings_.StartSplit("MarkStackAsLive");
+  accounting::ObjectStack* live_stack = heap_->GetLiveStack();
+  heap_->MarkAllocStackAsLive(live_stack);
+  live_stack->Reset();
+  timings_.EndSplit();
+  // Recursively process the mark stack.
+  ProcessMarkStack(true);
+}
+
+void SemiSpace::ReclaimPhase() {
+  TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
+  Thread* self = Thread::Current();
+  ProcessReferences(self);
+  {
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    SweepSystemWeaks();
+  }
+  // Record freed memory.
+  int from_bytes = from_space_->GetBytesAllocated();
+  int to_bytes = to_space_->GetBytesAllocated();
+  int from_objects = from_space_->GetObjectsAllocated();
+  int to_objects = to_space_->GetObjectsAllocated();
+  int freed_bytes = from_bytes - to_bytes;
+  int freed_objects = from_objects - to_objects;
+  CHECK_GE(freed_bytes, 0);
+  freed_bytes_.fetch_add(freed_bytes);
+  freed_objects_.fetch_add(freed_objects);
+  heap_->RecordFree(static_cast<size_t>(freed_objects), static_cast<size_t>(freed_bytes));
+
+  timings_.StartSplit("PreSweepingGcVerification");
+  heap_->PreSweepingGcVerification(this);
+  timings_.EndSplit();
+
+  {
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    // Reclaim unmarked objects.
+    Sweep(false);
+    // Swap the live and mark bitmaps for each space which we modified space. This is an
+    // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
+    // bitmaps.
+    timings_.StartSplit("SwapBitmaps");
+    SwapBitmaps();
+    timings_.EndSplit();
+    // Unbind the live and mark bitmaps.
+    UnBindBitmaps();
+  }
+  // Release the memory used by the from space.
+  if (kResetFromSpace) {
+    // Clearing from space.
+    from_space_->Clear();
+  }
+  // Protect the from space.
+  VLOG(heap)
+      << "mprotect region " << reinterpret_cast<void*>(from_space_->Begin()) << " - "
+      << reinterpret_cast<void*>(from_space_->Limit());
+  if (kProtectFromSpace) {
+    mprotect(from_space_->Begin(), from_space_->Capacity(), PROT_NONE);
+  } else {
+    mprotect(from_space_->Begin(), from_space_->Capacity(), PROT_READ);
+  }
+}
+
+void SemiSpace::ResizeMarkStack(size_t new_size) {
+  std::vector<Object*> temp(mark_stack_->Begin(), mark_stack_->End());
+  CHECK_LE(mark_stack_->Size(), new_size);
+  mark_stack_->Resize(new_size);
+  for (const auto& obj : temp) {
+    mark_stack_->PushBack(obj);
+  }
+}
+
+inline void SemiSpace::MarkStackPush(Object* obj) {
+  if (UNLIKELY(mark_stack_->Size() >= mark_stack_->Capacity())) {
+    ResizeMarkStack(mark_stack_->Capacity() * 2);
+  }
+  // The object must be pushed on to the mark stack.
+  mark_stack_->PushBack(obj);
+}
+
+// Rare case, probably not worth inlining since it will increase instruction cache miss rate.
+bool SemiSpace::MarkLargeObject(const Object* obj) {
+  // TODO: support >1 discontinuous space.
+  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
+  accounting::SpaceSetMap* large_objects = large_object_space->GetMarkObjects();
+  if (UNLIKELY(!large_objects->Test(obj))) {
+    large_objects->Set(obj);
+    return true;
+  }
+  return false;
+}
+
+// Used to mark and copy objects. Any newly-marked objects who are in the from space get moved to
+// the to-space and have their forward address updated. Objects which have been newly marked are
+// pushed on the mark stack.
+Object* SemiSpace::MarkObject(Object* obj) {
+  Object* ret = obj;
+  if (obj != nullptr && !IsImmune(obj)) {
+    if (from_space_->HasAddress(obj)) {
+      mirror::Object* forward_address = GetForwardingAddressInFromSpace(obj);
+      // If the object has already been moved, return the new forward address.
+      if (!to_space_->HasAddress(forward_address)) {
+        // Otherwise, we need to move the object and add it to the markstack for processing.
+        size_t object_size = obj->SizeOf();
+        size_t dummy = 0;
+        forward_address = to_space_->Alloc(self_, object_size, &dummy);
+        // Copy over the object and add it to the mark stack since we still need to update it's
+        // references.
+        memcpy(reinterpret_cast<void*>(forward_address), obj, object_size);
+        // Make sure to only update the forwarding address AFTER you copy the object so that the
+        // monitor word doesn't get stomped over.
+        COMPILE_ASSERT(sizeof(uint32_t) == sizeof(mirror::Object*),
+                       monitor_size_must_be_same_as_object);
+        obj->SetLockWord(LockWord::FromForwardingAddress(reinterpret_cast<size_t>(forward_address)));
+        MarkStackPush(forward_address);
+      }
+      ret = forward_address;
+      // TODO: Do we need this if in the else statement?
+    } else {
+      accounting::SpaceBitmap* object_bitmap = heap_->GetMarkBitmap()->GetContinuousSpaceBitmap(obj);
+      if (LIKELY(object_bitmap != nullptr)) {
+        // This object was not previously marked.
+        if (!object_bitmap->Test(obj)) {
+          object_bitmap->Set(obj);
+          MarkStackPush(obj);
+        }
+      } else {
+        DCHECK(!to_space_->HasAddress(obj)) << "Marking object in to_space_";
+        if (MarkLargeObject(obj)) {
+          MarkStackPush(obj);
+        }
+      }
+    }
+  }
+  return ret;
+}
+
+Object* SemiSpace::RecursiveMarkObjectCallback(Object* root, void* arg) {
+  DCHECK(root != nullptr);
+  DCHECK(arg != nullptr);
+  SemiSpace* semi_space = reinterpret_cast<SemiSpace*>(arg);
+  mirror::Object* ret = semi_space->MarkObject(root);
+  semi_space->ProcessMarkStack(true);
+  return ret;
+}
+
+Object* SemiSpace::MarkRootCallback(Object* root, void* arg) {
+  DCHECK(root != nullptr);
+  DCHECK(arg != nullptr);
+  return reinterpret_cast<SemiSpace*>(arg)->MarkObject(root);
+}
+
+// Marks all objects in the root set.
+void SemiSpace::MarkRoots() {
+  timings_.StartSplit("MarkRoots");
+  // TODO: Visit up image roots as well?
+  Runtime::Current()->VisitRoots(MarkRootCallback, this, false, true);
+  timings_.EndSplit();
+}
+
+void SemiSpace::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
+  CHECK(space->IsMallocSpace());
+  space::MallocSpace* alloc_space = space->AsMallocSpace();
+  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
+  GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
+}
+
+mirror::Object* SemiSpace::GetForwardingAddress(mirror::Object* obj) {
+  if (from_space_->HasAddress(obj)) {
+    LOG(FATAL) << "Shouldn't happen!";
+    return GetForwardingAddressInFromSpace(obj);
+  }
+  return obj;
+}
+
+mirror::Object* SemiSpace::MarkedForwardingAddressCallback(Object* object, void* arg) {
+  return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
+}
+
+void SemiSpace::SweepSystemWeaks() {
+  timings_.StartSplit("SweepSystemWeaks");
+  Runtime::Current()->SweepSystemWeaks(MarkedForwardingAddressCallback, this);
+  timings_.EndSplit();
+}
+
+struct SweepCallbackContext {
+  SemiSpace* mark_sweep;
+  space::AllocSpace* space;
+  Thread* self;
+};
+
+void SemiSpace::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
+  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
+  SemiSpace* gc = context->mark_sweep;
+  Heap* heap = gc->GetHeap();
+  space::AllocSpace* space = context->space;
+  Thread* self = context->self;
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(self);
+  size_t freed_bytes = space->FreeList(self, num_ptrs, ptrs);
+  heap->RecordFree(num_ptrs, freed_bytes);
+  gc->freed_objects_.fetch_add(num_ptrs);
+  gc->freed_bytes_.fetch_add(freed_bytes);
+}
+
+void SemiSpace::ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
+  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
+  Heap* heap = context->mark_sweep->GetHeap();
+  // We don't free any actual memory to avoid dirtying the shared zygote pages.
+  for (size_t i = 0; i < num_ptrs; ++i) {
+    Object* obj = static_cast<Object*>(ptrs[i]);
+    heap->GetLiveBitmap()->Clear(obj);
+    heap->GetCardTable()->MarkCard(obj);
+  }
+}
+
+void SemiSpace::Sweep(bool swap_bitmaps) {
+  DCHECK(mark_stack_->IsEmpty());
+  TimingLogger::ScopedSplit("Sweep", &timings_);
+
+  const bool partial = (GetGcType() == kGcTypePartial);
+  SweepCallbackContext scc;
+  scc.mark_sweep = this;
+  scc.self = Thread::Current();
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (!space->IsMallocSpace()) {
+      continue;
+    }
+    // We always sweep always collect spaces.
+    bool sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect);
+    if (!partial && !sweep_space) {
+      // We sweep full collect spaces when the GC isn't a partial GC (ie its full).
+      sweep_space = (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect);
+    }
+    if (sweep_space && space->IsMallocSpace()) {
+      uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
+      uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
+      scc.space = space->AsMallocSpace();
+      accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
+      accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
+      if (swap_bitmaps) {
+        std::swap(live_bitmap, mark_bitmap);
+      }
+      if (!space->IsZygoteSpace()) {
+        TimingLogger::ScopedSplit split("SweepAllocSpace", &timings_);
+        // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
+        accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
+                                           &SweepCallback, reinterpret_cast<void*>(&scc));
+      } else {
+        TimingLogger::ScopedSplit split("SweepZygote", &timings_);
+        // Zygote sweep takes care of dirtying cards and clearing live bits, does not free actual
+        // memory.
+        accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap, begin, end,
+                                           &ZygoteSweepCallback, reinterpret_cast<void*>(&scc));
+      }
+    }
+  }
+
+  SweepLargeObjects(swap_bitmaps);
+}
+
+void SemiSpace::SweepLargeObjects(bool swap_bitmaps) {
+  TimingLogger::ScopedSplit("SweepLargeObjects", &timings_);
+  // Sweep large objects
+  space::LargeObjectSpace* large_object_space = GetHeap()->GetLargeObjectsSpace();
+  accounting::SpaceSetMap* large_live_objects = large_object_space->GetLiveObjects();
+  accounting::SpaceSetMap* large_mark_objects = large_object_space->GetMarkObjects();
+  if (swap_bitmaps) {
+    std::swap(large_live_objects, large_mark_objects);
+  }
+  // O(n*log(n)) but hopefully there are not too many large objects.
+  size_t freed_objects = 0;
+  size_t freed_bytes = 0;
+  Thread* self = Thread::Current();
+  for (const Object* obj : large_live_objects->GetObjects()) {
+    if (!large_mark_objects->Test(obj)) {
+      freed_bytes += large_object_space->Free(self, const_cast<Object*>(obj));
+      ++freed_objects;
+    }
+  }
+  freed_large_objects_.fetch_add(freed_objects);
+  freed_large_object_bytes_.fetch_add(freed_bytes);
+  GetHeap()->RecordFree(freed_objects, freed_bytes);
+}
+
+// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// marked, put it on the appropriate list in the heap for later processing.
+void SemiSpace::DelayReferenceReferent(mirror::Class* klass, Object* obj) {
+  heap_->DelayReferenceReferent(klass, obj, MarkedForwardingAddressCallback, this);
+}
+
+// Visit all of the references of an object and update.
+void SemiSpace::ScanObject(Object* obj) {
+  DCHECK(obj != NULL);
+  DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
+  MarkSweep::VisitObjectReferences(obj, [this](Object* obj, Object* ref, const MemberOffset& offset,
+     bool /* is_static */) ALWAYS_INLINE NO_THREAD_SAFETY_ANALYSIS {
+    mirror::Object* new_address = MarkObject(ref);
+    if (new_address != ref) {
+      DCHECK(new_address != nullptr);
+      obj->SetFieldObject(offset, new_address, false);
+    }
+  }, kMovingClasses);
+  mirror::Class* klass = obj->GetClass();
+  if (UNLIKELY(klass->IsReferenceClass())) {
+    DelayReferenceReferent(klass, obj);
+  }
+}
+
+// Scan anything that's on the mark stack.
+void SemiSpace::ProcessMarkStack(bool paused) {
+  timings_.StartSplit(paused ? "(paused)ProcessMarkStack" : "ProcessMarkStack");
+  while (!mark_stack_->IsEmpty()) {
+    ScanObject(mark_stack_->PopBack());
+  }
+  timings_.EndSplit();
+}
+
+inline Object* SemiSpace::GetMarkedForwardAddress(mirror::Object* obj) const
+    SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+  // All immune objects are assumed marked.
+  if (IsImmune(obj)) {
+    return obj;
+  }
+  if (from_space_->HasAddress(obj)) {
+    mirror::Object* forwarding_address = GetForwardingAddressInFromSpace(const_cast<Object*>(obj));
+    // If the object is forwarded then it MUST be marked.
+    if (to_space_->HasAddress(forwarding_address)) {
+      return forwarding_address;
+    }
+    // Must not be marked, return nullptr;
+    return nullptr;
+  } else if (to_space_->HasAddress(obj)) {
+    // Already forwarded, must be marked.
+    return obj;
+  }
+  return heap_->GetMarkBitmap()->Test(obj) ? obj : nullptr;
+}
+
+void SemiSpace::UnBindBitmaps() {
+  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    if (space->IsMallocSpace()) {
+      space::MallocSpace* alloc_space = space->AsMallocSpace();
+      if (alloc_space->HasBoundBitmaps()) {
+        alloc_space->UnBindBitmaps();
+        heap_->GetMarkBitmap()->ReplaceBitmap(alloc_space->GetLiveBitmap(),
+                                              alloc_space->GetMarkBitmap());
+      }
+    }
+  }
+}
+
+void SemiSpace::SetToSpace(space::ContinuousMemMapAllocSpace* to_space) {
+  DCHECK(to_space != nullptr);
+  to_space_ = to_space;
+}
+
+void SemiSpace::SetFromSpace(space::ContinuousMemMapAllocSpace* from_space) {
+  DCHECK(from_space != nullptr);
+  from_space_ = from_space;
+}
+
+void SemiSpace::FinishPhase() {
+  TimingLogger::ScopedSplit split("FinishPhase", &timings_);
+  // Can't enqueue references if we hold the mutator lock.
+  Heap* heap = GetHeap();
+  timings_.NewSplit("PostGcVerification");
+  heap->PostGcVerification(this);
+
+  // Null the "to" and "from" spaces since compacting from one to the other isn't valid until
+  // further action is done by the heap.
+  to_space_ = nullptr;
+  from_space_ = nullptr;
+
+  // Update the cumulative statistics
+  total_freed_objects_ += GetFreedObjects() + GetFreedLargeObjects();
+  total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
+
+  // Ensure that the mark stack is empty.
+  CHECK(mark_stack_->IsEmpty());
+
+  // Update the cumulative loggers.
+  cumulative_timings_.Start();
+  cumulative_timings_.AddLogger(timings_);
+  cumulative_timings_.End();
+
+  // Clear all of the spaces' mark bitmaps.
+  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
+    accounting::SpaceBitmap* bitmap = space->GetMarkBitmap();
+    if (bitmap != nullptr &&
+        space->GetGcRetentionPolicy() != space::kGcRetentionPolicyNeverCollect) {
+      bitmap->Clear();
+    }
+  }
+  mark_stack_->Reset();
+
+  // Reset the marked large objects.
+  space::LargeObjectSpace* large_objects = GetHeap()->GetLargeObjectsSpace();
+  large_objects->GetMarkObjects()->Clear();
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
new file mode 100644
index 0000000..0f0cae1
--- /dev/null
+++ b/runtime/gc/collector/semi_space.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
+#define ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
+
+#include "atomic_integer.h"
+#include "barrier.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "garbage_collector.h"
+#include "offsets.h"
+#include "root_visitor.h"
+#include "UniquePtr.h"
+
+namespace art {
+
+namespace mirror {
+  class Class;
+  class Object;
+  template<class T> class ObjectArray;
+}  // namespace mirror
+
+class StackVisitor;
+class Thread;
+
+namespace gc {
+
+namespace accounting {
+  template <typename T> class AtomicStack;
+  class MarkIfReachesAllocspaceVisitor;
+  class ModUnionClearCardVisitor;
+  class ModUnionVisitor;
+  class ModUnionTableBitmap;
+  class MarkStackChunk;
+  typedef AtomicStack<mirror::Object*> ObjectStack;
+  class SpaceBitmap;
+}  // namespace accounting
+
+namespace space {
+  class BumpPointerSpace;
+  class ContinuousMemMapAllocSpace;
+  class ContinuousSpace;
+}  // namespace space
+
+class Heap;
+
+namespace collector {
+
+class SemiSpace : public GarbageCollector {
+ public:
+  explicit SemiSpace(Heap* heap, const std::string& name_prefix = "");
+
+  ~SemiSpace() {}
+
+  virtual void InitializePhase();
+  virtual bool IsConcurrent() const {
+    return false;
+  }
+  virtual void MarkingPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void ReclaimPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void FinishPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void MarkReachableObjects()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+  virtual GcType GetGcType() const {
+    return kGcTypePartial;
+  }
+
+  // Sets which space we will be copying objects to.
+  void SetToSpace(space::ContinuousMemMapAllocSpace* to_space);
+
+  // Set the space where we copy objects from.
+  void SetFromSpace(space::ContinuousMemMapAllocSpace* from_space);
+
+  // Initializes internal structures.
+  void Init();
+
+  // Find the default mark bitmap.
+  void FindDefaultMarkBitmap();
+
+  // Returns the new address of the object.
+  mirror::Object* MarkObject(mirror::Object* object)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  void ScanObject(mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Marks the root set at the start of a garbage collection.
+  void MarkRoots()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Make a space immune, immune spaces have all live objects marked - that is the mark and
+  // live bitmaps are bound together.
+  void ImmuneSpace(space::ContinuousSpace* space)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie
+  // the image. Mark that portion of the heap as immune.
+  virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void BindLiveToMarkBitmap(space::ContinuousSpace* space)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  void UnBindBitmaps()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  void ProcessReferences(Thread* self)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Sweeps unmarked objects to complete the garbage collection.
+  virtual void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Sweeps unmarked objects to complete the garbage collection.
+  void SweepLargeObjects(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Sweep only pointers within an array. WARNING: Trashes objects.
+  void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  mirror::Object* GetClearedReferences() {
+    return cleared_reference_list_;
+  }
+
+  // TODO: enable thread safety analysis when in use by multiple worker threads.
+  template <typename MarkVisitor>
+  void ScanObjectVisit(const mirror::Object* obj, const MarkVisitor& visitor)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  void SweepSystemWeaks()
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  template <typename Visitor>
+  static void VisitObjectReferencesAndClass(mirror::Object* obj, const Visitor& visitor)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static mirror::Object* MarkRootCallback(mirror::Object* root, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  static mirror::Object* RecursiveMarkObjectCallback(mirror::Object* root, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+ protected:
+  // Returns null if the object is not marked, otherwise returns the forwarding address (same as
+  // object for non movable things).
+  mirror::Object* GetMarkedForwardAddress(mirror::Object* object) const;
+
+  static mirror::Object* MarkedForwardingAddressCallback(mirror::Object* object, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Marks or unmarks a large object based on whether or not set is true. If set is true, then we
+  // mark, otherwise we unmark.
+  bool MarkLargeObject(const mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Special sweep for zygote that just marks objects / dirties cards.
+  static void ZygoteSweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
+  // Expand mark stack to 2x its current size.
+  void ResizeMarkStack(size_t new_size);
+
+  // Returns how many threads we should use for the current GC phase based on if we are paused,
+  // whether or not we care about pauses.
+  size_t GetThreadCount(bool paused) const;
+
+  // Returns true if an object is inside of the immune region (assumed to be marked).
+  bool IsImmune(const mirror::Object* obj) const ALWAYS_INLINE {
+    return obj >= immune_begin_ && obj < immune_end_;
+  }
+
+  bool IsImmuneSpace(const space::ContinuousSpace* space) const;
+
+  static void VerifyRootCallback(const mirror::Object* root, void* arg, size_t vreg,
+                                 const StackVisitor *visitor);
+
+  void VerifyRoot(const mirror::Object* root, size_t vreg, const StackVisitor* visitor)
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  template <typename Visitor>
+  static void VisitInstanceFieldsReferences(const mirror::Class* klass, const mirror::Object* obj,
+                                            const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Visit the header, static field references, and interface pointers of a class object.
+  template <typename Visitor>
+  static void VisitClassReferences(const mirror::Class* klass, const mirror::Object* obj,
+                                   const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  template <typename Visitor>
+  static void VisitStaticFieldsReferences(const mirror::Class* klass, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  template <typename Visitor>
+  static void VisitFieldsReferences(const mirror::Object* obj, uint32_t ref_offsets, bool is_static,
+                                    const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Visit all of the references in an object array.
+  template <typename Visitor>
+  static void VisitObjectArrayReferences(const mirror::ObjectArray<mirror::Object>* array,
+                                         const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Visits the header and field references of a data object.
+  template <typename Visitor>
+  static void VisitOtherReferences(const mirror::Class* klass, const mirror::Object* obj,
+                                   const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
+    return VisitInstanceFieldsReferences(klass, obj, visitor);
+  }
+
+  // Push an object onto the mark stack.
+  inline void MarkStackPush(mirror::Object* obj);
+
+  void UpdateAndMarkModUnion()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Schedules an unmarked object for reference processing.
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* reference)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  // Recursively blackens objects on the mark stack.
+  void ProcessMarkStack(bool paused)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  void EnqueueFinalizerReferences(mirror::Object** ref)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  void PreserveSomeSoftReferences(mirror::Object** ref)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  void ClearWhiteReferences(mirror::Object** list)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  void ProcessReferences(mirror::Object** soft_references, bool clear_soft_references,
+                         mirror::Object** weak_references,
+                         mirror::Object** finalizer_references,
+                         mirror::Object** phantom_references)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
+
+  inline mirror::Object* GetForwardingAddressInFromSpace(mirror::Object* obj) const;
+
+  mirror::Object* GetForwardingAddress(mirror::Object* obj);
+
+  // Current space, we check this space first to avoid searching for the appropriate space for an
+  // object.
+  accounting::ObjectStack* mark_stack_;
+
+  // Immune range, every object inside the immune range is assumed to be marked.
+  mirror::Object* immune_begin_;
+  mirror::Object* immune_end_;
+
+  // Destination and source spaces.
+  space::ContinuousMemMapAllocSpace* to_space_;
+  space::ContinuousMemMapAllocSpace* from_space_;
+
+  mirror::Object* soft_reference_list_;
+  mirror::Object* weak_reference_list_;
+  mirror::Object* finalizer_reference_list_;
+  mirror::Object* phantom_reference_list_;
+  mirror::Object* cleared_reference_list_;
+
+  Thread* self_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SemiSpace);
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index 9f0bf33..ee6077a 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -26,7 +26,7 @@
 
 StickyMarkSweep::StickyMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
     : PartialMarkSweep(heap, is_concurrent,
-                       name_prefix + (name_prefix.empty() ? "" : " ") + "sticky") {
+                       name_prefix.empty() ? "sticky " : name_prefix) {
   cumulative_timings_.SetName(GetName());
 }
 
@@ -38,7 +38,8 @@
   // know what was allocated since the last GC. A side-effect of binding the allocation space mark
   // and live bitmap is that marking the objects will place them in the live bitmap.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
+    if (space->IsMallocSpace() &&
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
       BindLiveToMarkBitmap(space);
     }
   }
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index 8bee00f..b675877 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -31,10 +31,6 @@
     return kGcTypeSticky;
   }
 
-  // Don't need to do anything special here since we scan all the cards which may have references
-  // to the newly allocated objects.
-  virtual void UpdateAndMarkModUnion() { }
-
   explicit StickyMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix = "");
   ~StickyMarkSweep() {}
 
@@ -53,6 +49,10 @@
 
   void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Don't need to do anything special here since we scan all the cards which may have references
+  // to the newly allocated objects.
+  virtual void UpdateAndMarkModUnion() { }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(StickyMarkSweep);
 };
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
new file mode 100644
index 0000000..ba3cad6
--- /dev/null
+++ b/runtime/gc/collector_type.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_TYPE_H_
+#define ART_RUNTIME_GC_COLLECTOR_TYPE_H_
+
+#include <ostream>
+
+namespace art {
+namespace gc {
+
+// Which types of collections are able to be performed.
+enum CollectorType {
+  // Non concurrent mark-sweep.
+  kCollectorTypeMS,
+  // Concurrent mark-sweep.
+  kCollectorTypeCMS,
+  // Semi-space / mark-sweep hybrid, enables compaction.
+  kCollectorTypeSS,
+};
+std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_TYPE_H_
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 873eadc..5eda0b9 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -20,8 +20,10 @@
 #include "heap.h"
 
 #include "debugger.h"
+#include "gc/space/bump_pointer_space-inl.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/rosalloc_space-inl.h"
 #include "object_utils.h"
 #include "runtime.h"
 #include "thread.h"
@@ -30,101 +32,129 @@
 namespace art {
 namespace gc {
 
-inline mirror::Object* Heap::AllocObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count) {
-  DebugCheckPreconditionsForAllobObject(c, byte_count);
-  mirror::Object* obj;
-  size_t bytes_allocated;
-  AllocationTimer alloc_timer(this, &obj);
-  bool large_object_allocation = TryAllocLargeObjectUninstrumented(self, c, byte_count,
-                                                                   &obj, &bytes_allocated);
-  if (LIKELY(!large_object_allocation)) {
-    // Non-large object allocation.
-    obj = AllocateUninstrumented(self, alloc_space_, byte_count, &bytes_allocated);
-    // Ensure that we did not allocate into a zygote space.
-    DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
-  }
-  if (LIKELY(obj != NULL)) {
-    obj->SetClass(c);
-    // Record allocation after since we want to use the atomic add for the atomic fence to guard
-    // the SetClass since we do not want the class to appear NULL in another thread.
-    size_t new_num_bytes_allocated = RecordAllocationUninstrumented(bytes_allocated, obj);
-    DCHECK(!Dbg::IsAllocTrackingEnabled());
-    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
-    if (kDesiredHeapVerification > kNoHeapVerification) {
-      VerifyObject(obj);
-    }
-    return obj;
-  }
-  ThrowOutOfMemoryError(self, byte_count, large_object_allocation);
-  return NULL;
-}
-
-inline size_t Heap::RecordAllocationUninstrumented(size_t size, mirror::Object* obj) {
-  DCHECK(obj != NULL);
-  DCHECK_GT(size, 0u);
-  size_t old_num_bytes_allocated = static_cast<size_t>(num_bytes_allocated_.fetch_add(size));
-
-  DCHECK(!Runtime::Current()->HasStatsEnabled());
-
-  // This is safe to do since the GC will never free objects which are neither in the allocation
-  // stack or the live bitmap.
-  while (!allocation_stack_->AtomicPushBack(obj)) {
-    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-  }
-
-  return old_num_bytes_allocated + size;
-}
-
-inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                                         bool grow, size_t* bytes_allocated) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
-    return NULL;
-  }
-  DCHECK(!running_on_valgrind_);
-  return space->Alloc(self, alloc_size, bytes_allocated);
-}
-
-// DlMallocSpace-specific version.
-inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                                         bool grow, size_t* bytes_allocated) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
-    return NULL;
-  }
-  DCHECK(!running_on_valgrind_);
-  return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
-}
-
-template <class T>
-inline mirror::Object* Heap::AllocateUninstrumented(Thread* self, T* space, size_t alloc_size,
-                                                    size_t* bytes_allocated) {
+template <bool kInstrumented, typename PreFenceVisitor>
+inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Class* klass,
+                                                      size_t byte_count, AllocatorType allocator,
+                                                      const PreFenceVisitor& pre_fence_visitor) {
+  DebugCheckPreconditionsForAllocObject(klass, byte_count);
   // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
   // done in the runnable state where suspension is expected.
   DCHECK_EQ(self->GetState(), kRunnable);
   self->AssertThreadSuspensionIsAllowable();
-
-  mirror::Object* ptr = TryToAllocateUninstrumented(self, space, alloc_size, false, bytes_allocated);
-  if (LIKELY(ptr != NULL)) {
-    return ptr;
+  mirror::Object* obj;
+  size_t bytes_allocated;
+  AllocationTimer alloc_timer(this, &obj);
+  if (UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
+    obj = TryToAllocate<kInstrumented>(self, kAllocatorTypeLOS, byte_count, false,
+                                       &bytes_allocated);
+    allocator = kAllocatorTypeLOS;
+  } else {
+    obj = TryToAllocate<kInstrumented>(self, allocator, byte_count, false, &bytes_allocated);
   }
-  return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
+
+  if (UNLIKELY(obj == nullptr)) {
+    SirtRef<mirror::Class> sirt_c(self, klass);
+    obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated);
+    if (obj == nullptr) {
+      return nullptr;
+    } else {
+      klass = sirt_c.get();
+    }
+  }
+  obj->SetClass(klass);
+  pre_fence_visitor(obj);
+  DCHECK_GT(bytes_allocated, 0u);
+  const size_t new_num_bytes_allocated =
+      static_cast<size_t>(num_bytes_allocated_.fetch_add(bytes_allocated)) + bytes_allocated;
+  // TODO: Deprecate.
+  if (kInstrumented) {
+    if (Runtime::Current()->HasStatsEnabled()) {
+      RuntimeStats* thread_stats = self->GetStats();
+      ++thread_stats->allocated_objects;
+      thread_stats->allocated_bytes += bytes_allocated;
+      RuntimeStats* global_stats = Runtime::Current()->GetStats();
+      ++global_stats->allocated_objects;
+      global_stats->allocated_bytes += bytes_allocated;
+    }
+  } else {
+    DCHECK(!Runtime::Current()->HasStatsEnabled());
+  }
+  if (AllocatorHasAllocationStack(allocator)) {
+    // This is safe to do since the GC will never free objects which are neither in the allocation
+    // stack or the live bitmap.
+    while (!allocation_stack_->AtomicPushBack(obj)) {
+      CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+    }
+  }
+  if (kInstrumented) {
+    if (Dbg::IsAllocTrackingEnabled()) {
+      Dbg::RecordAllocation(klass, bytes_allocated);
+    }
+  } else {
+    DCHECK(!Dbg::IsAllocTrackingEnabled());
+  }
+  if (AllocatorHasConcurrentGC(allocator)) {
+    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
+  }
+  if (kIsDebugBuild) {
+    if (kDesiredHeapVerification > kNoHeapVerification) {
+      VerifyObject(obj);
+    }
+    self->VerifyStack();
+  }
+  return obj;
 }
 
-inline bool Heap::TryAllocLargeObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count,
-                                                    mirror::Object** obj_ptr, size_t* bytes_allocated) {
-  bool large_object_allocation = ShouldAllocLargeObject(c, byte_count);
-  if (UNLIKELY(large_object_allocation)) {
-    mirror::Object* obj = AllocateUninstrumented(self, large_object_space_, byte_count, bytes_allocated);
-    // Make sure that our large object didn't get placed anywhere within the space interval or else
-    // it breaks the immune range.
-    DCHECK(obj == NULL ||
-           reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
-           reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
-    *obj_ptr = obj;
+template <const bool kInstrumented>
+inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type,
+                                           size_t alloc_size, bool grow,
+                                           size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return nullptr;
   }
-  return large_object_allocation;
+  if (kInstrumented) {
+    if (UNLIKELY(running_on_valgrind_ && allocator_type == kAllocatorTypeFreeList)) {
+      return non_moving_space_->Alloc(self, alloc_size, bytes_allocated);
+    }
+  }
+  mirror::Object* ret;
+  switch (allocator_type) {
+    case kAllocatorTypeBumpPointer: {
+      DCHECK(bump_pointer_space_ != nullptr);
+      alloc_size = RoundUp(alloc_size, space::BumpPointerSpace::kAlignment);
+      ret = bump_pointer_space_->AllocNonvirtual(alloc_size);
+      if (LIKELY(ret != nullptr)) {
+        *bytes_allocated = alloc_size;
+      }
+      break;
+    }
+    case kAllocatorTypeFreeList: {
+      if (kUseRosAlloc) {
+        ret = reinterpret_cast<space::RosAllocSpace*>(non_moving_space_)->AllocNonvirtual(
+            self, alloc_size, bytes_allocated);
+      } else {
+        ret = reinterpret_cast<space::DlMallocSpace*>(non_moving_space_)->AllocNonvirtual(
+            self, alloc_size, bytes_allocated);
+      }
+      break;
+    }
+    case kAllocatorTypeLOS: {
+      ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated);
+      // Note that the bump pointer spaces aren't necessarily next to
+      // the other continuous spaces like the non-moving alloc space or
+      // the zygote space.
+      DCHECK(ret == nullptr || large_object_space_->Contains(ret));
+      break;
+    }
+    default: {
+      LOG(FATAL) << "Invalid allocator type";
+      ret = nullptr;
+    }
+  }
+  return ret;
 }
 
-inline void Heap::DebugCheckPreconditionsForAllobObject(mirror::Class* c, size_t byte_count) {
+inline void Heap::DebugCheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) {
   DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
          (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
          strlen(ClassHelper(c).GetDescriptor()) == 0);
@@ -142,14 +172,14 @@
   if (kMeasureAllocationTime) {
     mirror::Object* allocated_obj = *allocated_obj_ptr_;
     // Only if the allocation succeeded, record the time.
-    if (allocated_obj != NULL) {
+    if (allocated_obj != nullptr) {
       uint64_t allocation_end_time = NanoTime() / kTimeAdjust;
       heap_->total_allocation_time_.fetch_add(allocation_end_time - allocation_start_time_);
     }
   }
 };
 
-inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) {
+inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const {
   // We need to have a zygote space or else our newly allocated large object can end up in the
   // Zygote resulting in it being prematurely freed.
   // We can only do this for primitive objects since large objects will not be within the card table
@@ -174,7 +204,8 @@
   return false;
 }
 
-inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, mirror::Object* obj) {
+inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
+                                    mirror::Object* obj) {
   if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) {
     // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
     SirtRef<mirror::Object> ref(self, obj);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 804c669..5e62729 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -23,6 +23,7 @@
 #include <vector>
 #include <valgrind.h>
 
+#include "base/histogram-inl.h"
 #include "base/stl_util.h"
 #include "common_throws.h"
 #include "cutils/sched_policy.h"
@@ -30,14 +31,18 @@
 #include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap-inl.h"
+#include "gc/accounting/mod_union_table.h"
 #include "gc/accounting/mod_union_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/collector/partial_mark_sweep.h"
+#include "gc/collector/semi_space.h"
 #include "gc/collector/sticky_mark_sweep.h"
+#include "gc/space/bump_pointer_space.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
+#include "gc/space/rosalloc_space-inl.h"
 #include "gc/space/space-inl.h"
 #include "heap-inl.h"
 #include "image.h"
@@ -49,6 +54,7 @@
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
 #include "os.h"
+#include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "sirt_ref.h"
@@ -57,22 +63,24 @@
 #include "well_known_classes.h"
 
 namespace art {
+
+extern void SetQuickAllocEntryPointsAllocator(gc::AllocatorType allocator);
+
 namespace gc {
 
 static constexpr bool kGCALotMode = false;
 static constexpr size_t kGcAlotInterval = KB;
-static constexpr bool kDumpGcPerformanceOnShutdown = false;
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
 static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& image_file_name,
-           bool concurrent_gc, size_t parallel_gc_threads, size_t conc_gc_threads,
+           CollectorType collector_type, size_t parallel_gc_threads, size_t conc_gc_threads,
            bool low_memory_mode, size_t long_pause_log_threshold, size_t long_gc_log_threshold,
            bool ignore_max_footprint)
-    : alloc_space_(NULL),
-      card_table_(NULL),
-      concurrent_gc_(concurrent_gc),
+    : non_moving_space_(nullptr),
+      concurrent_gc_(collector_type == gc::kCollectorTypeCMS),
+      collector_type_(collector_type),
       parallel_gc_threads_(parallel_gc_threads),
       conc_gc_threads_(conc_gc_threads),
       low_memory_mode_(low_memory_mode),
@@ -80,10 +88,11 @@
       long_gc_log_threshold_(long_gc_log_threshold),
       ignore_max_footprint_(ignore_max_footprint),
       have_zygote_space_(false),
-      soft_ref_queue_lock_(NULL),
-      weak_ref_queue_lock_(NULL),
-      finalizer_ref_queue_lock_(NULL),
-      phantom_ref_queue_lock_(NULL),
+      soft_reference_queue_(this),
+      weak_reference_queue_(this),
+      finalizer_reference_queue_(this),
+      phantom_reference_queue_(this),
+      cleared_references_(this),
       is_gc_running_(false),
       last_gc_type_(collector::kGcTypeNone),
       next_gc_type_(collector::kGcTypePartial),
@@ -92,6 +101,7 @@
       max_allowed_footprint_(initial_size),
       native_footprint_gc_watermark_(initial_size),
       native_footprint_limit_(2 * initial_size),
+      native_need_to_run_finalization_(false),
       activity_thread_class_(NULL),
       application_thread_class_(NULL),
       activity_thread_(NULL),
@@ -122,7 +132,11 @@
        * searching.
        */
       max_allocation_stack_size_(kGCALotMode ? kGcAlotInterval
-          : (kDesiredHeapVerification > kNoHeapVerification) ? KB : MB),
+          : (kDesiredHeapVerification > kVerifyAllFast) ? KB : MB),
+      current_allocator_(kMovingCollector ? kAllocatorTypeBumpPointer : kAllocatorTypeFreeList),
+      current_non_moving_allocator_(kAllocatorTypeFreeList),
+      bump_pointer_space_(nullptr),
+      temp_space_(nullptr),
       reference_referent_offset_(0),
       reference_queue_offset_(0),
       reference_queueNext_offset_(0),
@@ -134,38 +148,59 @@
       total_wait_time_(0),
       total_allocation_time_(0),
       verify_object_mode_(kHeapVerificationNotPermitted),
+      gc_disable_count_(0),
       running_on_valgrind_(RUNNING_ON_VALGRIND) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
-
+  // If we aren't the zygote, switch to the default non zygote allocator. This may update the
+  // entrypoints.
+  if (!Runtime::Current()->IsZygote()) {
+    ChangeCollector(collector_type_);
+  }
   live_bitmap_.reset(new accounting::HeapBitmap(this));
   mark_bitmap_.reset(new accounting::HeapBitmap(this));
-
   // Requested begin for the alloc space, to follow the mapped image and oat files
-  byte* requested_alloc_space_begin = NULL;
+  byte* requested_alloc_space_begin = nullptr;
   if (!image_file_name.empty()) {
     space::ImageSpace* image_space = space::ImageSpace::Create(image_file_name.c_str());
-    CHECK(image_space != NULL) << "Failed to create space for " << image_file_name;
-    AddContinuousSpace(image_space);
+    CHECK(image_space != nullptr) << "Failed to create space for " << image_file_name;
+    AddSpace(image_space);
     // Oat files referenced by image files immediately follow them in memory, ensure alloc space
     // isn't going to get in the middle
     byte* oat_file_end_addr = image_space->GetImageHeader().GetOatFileEnd();
     CHECK_GT(oat_file_end_addr, image_space->End());
     if (oat_file_end_addr > requested_alloc_space_begin) {
-      requested_alloc_space_begin =
-          reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(oat_file_end_addr),
-                                          kPageSize));
+      requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
     }
   }
 
-  alloc_space_ = space::DlMallocSpace::Create(Runtime::Current()->IsZygote() ? "zygote space" : "alloc space",
-                                              initial_size,
-                                              growth_limit, capacity,
-                                              requested_alloc_space_begin);
-  CHECK(alloc_space_ != NULL) << "Failed to create alloc space";
-  alloc_space_->SetFootprintLimit(alloc_space_->Capacity());
-  AddContinuousSpace(alloc_space_);
+  const char* name = Runtime::Current()->IsZygote() ? "zygote space" : "alloc space";
+  if (!kUseRosAlloc) {
+    non_moving_space_ = space::DlMallocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                     requested_alloc_space_begin);
+  } else {
+    non_moving_space_ = space::RosAllocSpace::Create(name, initial_size, growth_limit, capacity,
+                                                     requested_alloc_space_begin);
+  }
+  if (kMovingCollector) {
+    // TODO: Place bump-pointer spaces somewhere to minimize size of card table.
+    // TODO: Having 3+ spaces as big as the large heap size can cause virtual memory fragmentation
+    // issues.
+    const size_t bump_pointer_space_size = std::min(non_moving_space_->Capacity(), 128 * MB);
+    bump_pointer_space_ = space::BumpPointerSpace::Create("Bump pointer space",
+                                                          bump_pointer_space_size, nullptr);
+    CHECK(bump_pointer_space_ != nullptr) << "Failed to create bump pointer space";
+    AddSpace(bump_pointer_space_);
+    temp_space_ = space::BumpPointerSpace::Create("Bump pointer space 2", bump_pointer_space_size,
+                                                  nullptr);
+    CHECK(temp_space_ != nullptr) << "Failed to create bump pointer space";
+    AddSpace(temp_space_);
+  }
+
+  CHECK(non_moving_space_ != NULL) << "Failed to create non-moving space";
+  non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
+  AddSpace(non_moving_space_);
 
   // Allocate the large object space.
   const bool kUseFreeListSpaceForLOS = false;
@@ -175,22 +210,23 @@
     large_object_space_ = space::LargeObjectMapSpace::Create("large object space");
   }
   CHECK(large_object_space_ != NULL) << "Failed to create large object space";
-  AddDiscontinuousSpace(large_object_space_);
+  AddSpace(large_object_space_);
 
   // Compute heap capacity. Continuous spaces are sorted in order of Begin().
+  CHECK(!continuous_spaces_.empty());
+  // Relies on the spaces being sorted.
   byte* heap_begin = continuous_spaces_.front()->Begin();
-  size_t heap_capacity = continuous_spaces_.back()->End() - continuous_spaces_.front()->Begin();
-  if (continuous_spaces_.back()->IsDlMallocSpace()) {
-    heap_capacity += continuous_spaces_.back()->AsDlMallocSpace()->NonGrowthLimitCapacity();
-  }
+  byte* heap_end = continuous_spaces_.back()->Limit();
+  size_t heap_capacity = heap_end - heap_begin;
 
   // Allocate the card table.
   card_table_.reset(accounting::CardTable::Create(heap_begin, heap_capacity));
   CHECK(card_table_.get() != NULL) << "Failed to create card table";
 
+  // Card cache for now since it makes it easier for us to update the references to the copying
+  // spaces.
   accounting::ModUnionTable* mod_union_table =
-      new accounting::ModUnionTableToZygoteAllocspace("Image mod-union table", this,
-                                                      GetImageSpace());
+      new accounting::ModUnionTableCardCache("Image mod-union table", this, GetImageSpace());
   CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
   AddModUnionTable(mod_union_table);
 
@@ -211,33 +247,33 @@
   gc_complete_lock_ = new Mutex("GC complete lock");
   gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable",
                                                 *gc_complete_lock_));
-
-  // Create the reference queue locks, this is required so for parallel object scanning in the GC.
-  soft_ref_queue_lock_ = new Mutex("Soft reference queue lock");
-  weak_ref_queue_lock_ = new Mutex("Weak reference queue lock");
-  finalizer_ref_queue_lock_ = new Mutex("Finalizer reference queue lock");
-  phantom_ref_queue_lock_ = new Mutex("Phantom reference queue lock");
-
   last_gc_time_ns_ = NanoTime();
   last_gc_size_ = GetBytesAllocated();
 
   if (ignore_max_footprint_) {
     SetIdealFootprint(std::numeric_limits<size_t>::max());
-    concurrent_start_bytes_ = max_allowed_footprint_;
+    concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
   }
+  CHECK_NE(max_allowed_footprint_, 0U);
 
   // Create our garbage collectors.
   for (size_t i = 0; i < 2; ++i) {
     const bool concurrent = i != 0;
-    mark_sweep_collectors_.push_back(new collector::MarkSweep(this, concurrent));
-    mark_sweep_collectors_.push_back(new collector::PartialMarkSweep(this, concurrent));
-    mark_sweep_collectors_.push_back(new collector::StickyMarkSweep(this, concurrent));
+    garbage_collectors_.push_back(new collector::MarkSweep(this, concurrent));
+    garbage_collectors_.push_back(new collector::PartialMarkSweep(this, concurrent));
+    garbage_collectors_.push_back(new collector::StickyMarkSweep(this, concurrent));
+  }
+  gc_plan_.push_back(collector::kGcTypeSticky);
+  gc_plan_.push_back(collector::kGcTypePartial);
+  gc_plan_.push_back(collector::kGcTypeFull);
+  if (kMovingCollector) {
+    // TODO: Clean this up.
+    semi_space_collector_ = new collector::SemiSpace(this);
+    garbage_collectors_.push_back(semi_space_collector_);
   }
 
-  CHECK_NE(max_allowed_footprint_, 0U);
-
   if (running_on_valgrind_) {
-    Runtime::Current()->InstrumentQuickAllocEntryPoints();
+    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
   }
 
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
@@ -245,19 +281,100 @@
   }
 }
 
+void Heap::ChangeAllocator(AllocatorType allocator) {
+  DCHECK_NE(allocator, kAllocatorTypeLOS);
+  if (current_allocator_ != allocator) {
+    current_allocator_ = allocator;
+    SetQuickAllocEntryPointsAllocator(current_allocator_);
+    Runtime::Current()->GetInstrumentation()->ResetQuickAllocEntryPoints();
+  }
+}
+
+bool Heap::IsCompilingBoot() const {
+  for (const auto& space : continuous_spaces_) {
+    if (space->IsImageSpace()) {
+      return false;
+    } else if (space->IsZygoteSpace()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Heap::HasImageSpace() const {
+  for (const auto& space : continuous_spaces_) {
+    if (space->IsImageSpace()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void Heap::IncrementDisableGC(Thread* self) {
+  // Need to do this holding the lock to prevent races where the GC is about to run / running when
+  // we attempt to disable it.
+  ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+  MutexLock mu(self, *gc_complete_lock_);
+  WaitForGcToCompleteLocked(self);
+  ++gc_disable_count_;
+}
+
+void Heap::DecrementDisableGC(Thread* self) {
+  MutexLock mu(self, *gc_complete_lock_);
+  CHECK_GE(gc_disable_count_, 0U);
+  --gc_disable_count_;
+}
+
 void Heap::CreateThreadPool() {
   const size_t num_threads = std::max(parallel_gc_threads_, conc_gc_threads_);
   if (num_threads != 0) {
-    thread_pool_.reset(new ThreadPool(num_threads));
+    thread_pool_.reset(new ThreadPool("Heap thread pool", num_threads));
   }
 }
 
+void Heap::VisitObjects(ObjectVisitorCallback callback, void* arg) {
+  // Visit objects in bump pointer space.
+  Thread* self = Thread::Current();
+  // TODO: Use reference block.
+  std::vector<SirtRef<mirror::Object>*> saved_refs;
+  if (bump_pointer_space_ != nullptr) {
+    // Need to put all these in sirts since the callback may trigger a GC. TODO: Use a better data
+    // structure.
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(bump_pointer_space_->Begin());
+    const mirror::Object* end = reinterpret_cast<const mirror::Object*>(
+        bump_pointer_space_->End());
+    while (obj < end) {
+      saved_refs.push_back(new SirtRef<mirror::Object>(self, obj));
+      obj = space::BumpPointerSpace::GetNextObject(obj);
+    }
+  }
+  // TODO: Switch to standard begin and end to use ranged a based loop.
+  for (mirror::Object** it = allocation_stack_->Begin(), **end = allocation_stack_->End();
+      it < end; ++it) {
+    mirror::Object* obj = *it;
+    // Objects in the allocation stack might be in a movable space.
+    saved_refs.push_back(new SirtRef<mirror::Object>(self, obj));
+  }
+  GetLiveBitmap()->Walk(callback, arg);
+  for (const auto& ref : saved_refs) {
+    callback(ref->get(), arg);
+  }
+  // Need to free the sirts in reverse order they were allocated.
+  for (size_t i = saved_refs.size(); i != 0; --i) {
+    delete saved_refs[i - 1];
+  }
+}
+
+void Heap::MarkAllocStackAsLive(accounting::ObjectStack* stack) {
+  MarkAllocStack(non_moving_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(), stack);
+}
+
 void Heap::DeleteThreadPool() {
   thread_pool_.reset(nullptr);
 }
 
 static bool ReadStaticInt(JNIEnvExt* env, jclass clz, const char* name, int* out_value) {
-  CHECK(out_value != NULL);
+  DCHECK(out_value != NULL);
   jfieldID field = env->GetStaticFieldID(clz, name, "I");
   if (field == NULL) {
     env->ExceptionClear();
@@ -374,62 +491,71 @@
   }
 }
 
-void Heap::AddContinuousSpace(space::ContinuousSpace* space) {
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+void Heap::AddSpace(space::Space* space) {
   DCHECK(space != NULL);
-  DCHECK(space->GetLiveBitmap() != NULL);
-  live_bitmap_->AddContinuousSpaceBitmap(space->GetLiveBitmap());
-  DCHECK(space->GetMarkBitmap() != NULL);
-  mark_bitmap_->AddContinuousSpaceBitmap(space->GetMarkBitmap());
-  continuous_spaces_.push_back(space);
-  if (space->IsDlMallocSpace() && !space->IsLargeObjectSpace()) {
-    alloc_space_ = space->AsDlMallocSpace();
-  }
-
-  // Ensure that spaces remain sorted in increasing order of start address (required for CMS finger)
-  std::sort(continuous_spaces_.begin(), continuous_spaces_.end(),
-            [](const space::ContinuousSpace* a, const space::ContinuousSpace* b) {
-              return a->Begin() < b->Begin();
-            });
-
-  // Ensure that ImageSpaces < ZygoteSpaces < AllocSpaces so that we can do address based checks to
-  // avoid redundant marking.
-  bool seen_zygote = false, seen_alloc = false;
-  for (const auto& space : continuous_spaces_) {
-    if (space->IsImageSpace()) {
-      DCHECK(!seen_zygote);
-      DCHECK(!seen_alloc);
-    } else if (space->IsZygoteSpace()) {
-      DCHECK(!seen_alloc);
-      seen_zygote = true;
-    } else if (space->IsDlMallocSpace()) {
-      seen_alloc = true;
+  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  if (space->IsContinuousSpace()) {
+    DCHECK(!space->IsDiscontinuousSpace());
+    space::ContinuousSpace* continuous_space = space->AsContinuousSpace();
+    // Continuous spaces don't necessarily have bitmaps.
+    accounting::SpaceBitmap* live_bitmap = continuous_space->GetLiveBitmap();
+    accounting::SpaceBitmap* mark_bitmap = continuous_space->GetMarkBitmap();
+    if (live_bitmap != nullptr) {
+      DCHECK(mark_bitmap != nullptr);
+      live_bitmap_->AddContinuousSpaceBitmap(live_bitmap);
+      mark_bitmap_->AddContinuousSpaceBitmap(mark_bitmap);
     }
+
+    continuous_spaces_.push_back(continuous_space);
+    if (continuous_space->IsMallocSpace()) {
+      non_moving_space_ = continuous_space->AsMallocSpace();
+    }
+
+    // Ensure that spaces remain sorted in increasing order of start address.
+    std::sort(continuous_spaces_.begin(), continuous_spaces_.end(),
+              [](const space::ContinuousSpace* a, const space::ContinuousSpace* b) {
+      return a->Begin() < b->Begin();
+    });
+    // Ensure that ImageSpaces < ZygoteSpaces < AllocSpaces so that we can do address based checks to
+    // avoid redundant marking.
+    bool seen_zygote = false, seen_alloc = false;
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsImageSpace()) {
+        CHECK(!seen_zygote);
+        CHECK(!seen_alloc);
+      } else if (space->IsZygoteSpace()) {
+        CHECK(!seen_alloc);
+        seen_zygote = true;
+      } else if (space->IsMallocSpace()) {
+        seen_alloc = true;
+      }
+    }
+  } else {
+    DCHECK(space->IsDiscontinuousSpace());
+    space::DiscontinuousSpace* discontinuous_space = space->AsDiscontinuousSpace();
+    DCHECK(discontinuous_space->GetLiveObjects() != nullptr);
+    live_bitmap_->AddDiscontinuousObjectSet(discontinuous_space->GetLiveObjects());
+    DCHECK(discontinuous_space->GetMarkObjects() != nullptr);
+    mark_bitmap_->AddDiscontinuousObjectSet(discontinuous_space->GetMarkObjects());
+    discontinuous_spaces_.push_back(discontinuous_space);
+  }
+  if (space->IsAllocSpace()) {
+    alloc_spaces_.push_back(space->AsAllocSpace());
   }
 }
 
 void Heap::RegisterGCAllocation(size_t bytes) {
-  if (this != NULL) {
+  if (this != nullptr) {
     gc_memory_overhead_.fetch_add(bytes);
   }
 }
 
 void Heap::RegisterGCDeAllocation(size_t bytes) {
-  if (this != NULL) {
+  if (this != nullptr) {
     gc_memory_overhead_.fetch_sub(bytes);
   }
 }
 
-void Heap::AddDiscontinuousSpace(space::DiscontinuousSpace* space) {
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
-  DCHECK(space != NULL);
-  DCHECK(space->GetLiveObjects() != NULL);
-  live_bitmap_->AddDiscontinuousObjectSet(space->GetLiveObjects());
-  DCHECK(space->GetMarkObjects() != NULL);
-  mark_bitmap_->AddDiscontinuousObjectSet(space->GetMarkObjects());
-  discontinuous_spaces_.push_back(space);
-}
-
 void Heap::DumpGcPerformanceInfo(std::ostream& os) {
   // Dump cumulative timings.
   os << "Dumping cumulative Gc timings\n";
@@ -437,7 +563,7 @@
 
   // Dump cumulative loggers for each GC type.
   uint64_t total_paused_time = 0;
-  for (const auto& collector : mark_sweep_collectors_) {
+  for (const auto& collector : garbage_collectors_) {
     CumulativeLogger& logger = collector->GetCumulativeTimings();
     if (logger.GetTotalNs() != 0) {
       os << Dumpable<CumulativeLogger>(logger);
@@ -446,8 +572,10 @@
       double seconds = NsToMs(logger.GetTotalNs()) / 1000.0;
       const uint64_t freed_bytes = collector->GetTotalFreedBytes();
       const uint64_t freed_objects = collector->GetTotalFreedObjects();
+      Histogram<uint64_t>::CumulativeData cumulative_data;
+      collector->GetPauseHistogram().CreateHistogram(&cumulative_data);
+      collector->GetPauseHistogram().PrintConfidenceIntervals(os, 0.99, cumulative_data);
       os << collector->GetName() << " total time: " << PrettyDuration(total_ns) << "\n"
-         << collector->GetName() << " paused time: " << PrettyDuration(total_pause_ns) << "\n"
          << collector->GetName() << " freed: " << freed_objects
          << " objects with total size " << PrettySize(freed_bytes) << "\n"
          << collector->GetName() << " throughput: " << freed_objects / seconds << "/s / "
@@ -480,25 +608,16 @@
 }
 
 Heap::~Heap() {
-  if (kDumpGcPerformanceOnShutdown) {
-    DumpGcPerformanceInfo(LOG(INFO));
-  }
-
-  STLDeleteElements(&mark_sweep_collectors_);
-
-  // If we don't reset then the mark stack complains in it's destructor.
+  VLOG(heap) << "Starting ~Heap()";
+  STLDeleteElements(&garbage_collectors_);
+  // If we don't reset then the mark stack complains in its destructor.
   allocation_stack_->Reset();
   live_stack_->Reset();
-
-  VLOG(heap) << "~Heap()";
   STLDeleteValues(&mod_union_tables_);
   STLDeleteElements(&continuous_spaces_);
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
-  delete soft_ref_queue_lock_;
-  delete weak_ref_queue_lock_;
-  delete finalizer_ref_queue_lock_;
-  delete phantom_ref_queue_lock_;
+  VLOG(heap) << "Finished ~Heap()";
 }
 
 space::ContinuousSpace* Heap::FindContinuousSpaceFromObject(const mirror::Object* obj,
@@ -535,6 +654,106 @@
   return FindDiscontinuousSpaceFromObject(obj, true);
 }
 
+struct SoftReferenceArgs {
+  RootVisitor* is_marked_callback_;
+  RootVisitor* recursive_mark_callback_;
+  void* arg_;
+};
+
+mirror::Object* Heap::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
+  SoftReferenceArgs* args  = reinterpret_cast<SoftReferenceArgs*>(arg);
+  // TODO: Not preserve all soft references.
+  return args->recursive_mark_callback_(obj, args->arg_);
+}
+
+// Process reference class instances and schedule finalizations.
+void Heap::ProcessReferences(TimingLogger& timings, bool clear_soft,
+                             RootVisitor* is_marked_callback,
+                             RootVisitor* recursive_mark_object_callback, void* arg) {
+  // Unless we are in the zygote or required to clear soft references with white references,
+  // preserve some white referents.
+  if (!clear_soft && !Runtime::Current()->IsZygote()) {
+    SoftReferenceArgs soft_reference_args;
+    soft_reference_args.is_marked_callback_ = is_marked_callback;
+    soft_reference_args.recursive_mark_callback_ = recursive_mark_object_callback;
+    soft_reference_args.arg_ = arg;
+    soft_reference_queue_.PreserveSomeSoftReferences(&PreserveSoftReferenceCallback,
+                                                     &soft_reference_args);
+  }
+  timings.StartSplit("ProcessReferences");
+  // Clear all remaining soft and weak references with white referents.
+  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  timings.EndSplit();
+  // Preserve all white objects with finalize methods and schedule them for finalization.
+  timings.StartSplit("EnqueueFinalizerReferences");
+  finalizer_reference_queue_.EnqueueFinalizerReferences(cleared_references_, is_marked_callback,
+                                                        recursive_mark_object_callback, arg);
+  timings.EndSplit();
+  timings.StartSplit("ProcessReferences");
+  // Clear all f-reachable soft and weak references with white referents.
+  soft_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  weak_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  // Clear all phantom references with white referents.
+  phantom_reference_queue_.ClearWhiteReferences(cleared_references_, is_marked_callback, arg);
+  // At this point all reference queues other than the cleared references should be empty.
+  DCHECK(soft_reference_queue_.IsEmpty());
+  DCHECK(weak_reference_queue_.IsEmpty());
+  DCHECK(finalizer_reference_queue_.IsEmpty());
+  DCHECK(phantom_reference_queue_.IsEmpty());
+  timings.EndSplit();
+}
+
+bool Heap::IsEnqueued(mirror::Object* ref) const {
+  // Since the references are stored as cyclic lists it means that once enqueued, the pending next
+  // will always be non-null.
+  return ref->GetFieldObject<mirror::Object*>(GetReferencePendingNextOffset(), false) != nullptr;
+}
+
+bool Heap::IsEnqueuable(const mirror::Object* ref) const {
+  DCHECK(ref != nullptr);
+  const mirror::Object* queue =
+      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueOffset(), false);
+  const mirror::Object* queue_next =
+      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueNextOffset(), false);
+  return queue != nullptr && queue_next == nullptr;
+}
+
+// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// marked, put it on the appropriate list in the heap for later processing.
+void Heap::DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj,
+                                  RootVisitor mark_visitor, void* arg) {
+  DCHECK(klass != nullptr);
+  DCHECK(klass->IsReferenceClass());
+  DCHECK(obj != nullptr);
+  mirror::Object* referent = GetReferenceReferent(obj);
+  if (referent != nullptr) {
+    mirror::Object* forward_address = mark_visitor(referent, arg);
+    // Null means that the object is not currently marked.
+    if (forward_address == nullptr) {
+      Thread* self = Thread::Current();
+      // TODO: Remove these locks, and use atomic stacks for storing references?
+      // We need to check that the references haven't already been enqueued since we can end up
+      // scanning the same reference multiple times due to dirty cards.
+      if (klass->IsSoftReferenceClass()) {
+        soft_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else if (klass->IsWeakReferenceClass()) {
+        weak_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else if (klass->IsFinalizerReferenceClass()) {
+        finalizer_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else if (klass->IsPhantomReferenceClass()) {
+        phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, obj);
+      } else {
+        LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
+                   << klass->GetAccessFlags();
+      }
+    } else if (referent != forward_address) {
+      // Referent is already marked and we need to update it.
+      SetReferenceReferent(obj, forward_address);
+    }
+  }
+}
+
 space::ImageSpace* Heap::GetImageSpace() const {
   for (const auto& space : continuous_spaces_) {
     if (space->IsImageSpace()) {
@@ -562,8 +781,15 @@
   if (!large_object_allocation && total_bytes_free >= byte_count) {
     size_t max_contiguous_allocation = 0;
     for (const auto& space : continuous_spaces_) {
-      if (space->IsDlMallocSpace()) {
-        space->AsDlMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+      if (space->IsMallocSpace()) {
+        // To allow the Walk/InspectAll() to exclusively-lock the mutator
+        // lock, temporarily release the shared access to the mutator
+        // lock here by transitioning to the suspended state.
+        Locks::mutator_lock_->AssertSharedHeld(self);
+        self->TransitionFromRunnableToSuspended(kSuspended);
+        space->AsMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+        self->TransitionFromSuspendedToRunnable();
+        Locks::mutator_lock_->AssertSharedHeld(self);
       }
     }
     oss << "; failed due to fragmentation (largest possible contiguous allocation "
@@ -572,68 +798,57 @@
   self->ThrowOutOfMemoryError(oss.str().c_str());
 }
 
-inline bool Heap::TryAllocLargeObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count,
-                                                  mirror::Object** obj_ptr, size_t* bytes_allocated) {
-  bool large_object_allocation = ShouldAllocLargeObject(c, byte_count);
-  if (UNLIKELY(large_object_allocation)) {
-    mirror::Object* obj = AllocateInstrumented(self, large_object_space_, byte_count, bytes_allocated);
-    // Make sure that our large object didn't get placed anywhere within the space interval or else
-    // it breaks the immune range.
-    DCHECK(obj == NULL ||
-           reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
-           reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
-    *obj_ptr = obj;
+void Heap::Trim() {
+  uint64_t start_ns = NanoTime();
+  // Trim the managed spaces.
+  uint64_t total_alloc_space_allocated = 0;
+  uint64_t total_alloc_space_size = 0;
+  uint64_t managed_reclaimed = 0;
+  for (const auto& space : continuous_spaces_) {
+    if (space->IsMallocSpace() && !space->IsZygoteSpace()) {
+      gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
+      total_alloc_space_size += alloc_space->Size();
+      managed_reclaimed += alloc_space->Trim();
+    }
   }
-  return large_object_allocation;
+  total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated() -
+      bump_pointer_space_->GetBytesAllocated();
+  const float managed_utilization = static_cast<float>(total_alloc_space_allocated) /
+      static_cast<float>(total_alloc_space_size);
+  uint64_t gc_heap_end_ns = NanoTime();
+  // Trim the native heap.
+  dlmalloc_trim(0);
+  size_t native_reclaimed = 0;
+  dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
+  uint64_t end_ns = NanoTime();
+  VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
+      << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
+      << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed)
+      << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization)
+      << "%.";
 }
 
-mirror::Object* Heap::AllocObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count) {
-  DebugCheckPreconditionsForAllobObject(c, byte_count);
-  mirror::Object* obj;
-  size_t bytes_allocated;
-  AllocationTimer alloc_timer(this, &obj);
-  bool large_object_allocation = TryAllocLargeObjectInstrumented(self, c, byte_count,
-                                                                 &obj, &bytes_allocated);
-  if (LIKELY(!large_object_allocation)) {
-    // Non-large object allocation.
-    obj = AllocateInstrumented(self, alloc_space_, byte_count, &bytes_allocated);
-    // Ensure that we did not allocate into a zygote space.
-    DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
-  }
-  if (LIKELY(obj != NULL)) {
-    obj->SetClass(c);
-    // Record allocation after since we want to use the atomic add for the atomic fence to guard
-    // the SetClass since we do not want the class to appear NULL in another thread.
-    size_t new_num_bytes_allocated = RecordAllocationInstrumented(bytes_allocated, obj);
-    if (Dbg::IsAllocTrackingEnabled()) {
-      Dbg::RecordAllocation(c, byte_count);
-    }
-    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
-    if (kDesiredHeapVerification > kNoHeapVerification) {
-      VerifyObject(obj);
-    }
-    return obj;
-  }
-  ThrowOutOfMemoryError(self, byte_count, large_object_allocation);
-  return NULL;
-}
-
-bool Heap::IsHeapAddress(const mirror::Object* obj) {
-  // Note: we deliberately don't take the lock here, and mustn't test anything that would
-  // require taking the lock.
-  if (obj == NULL) {
+bool Heap::IsValidObjectAddress(const mirror::Object* obj) const {
+  // Note: we deliberately don't take the lock here, and mustn't test anything that would require
+  // taking the lock.
+  if (obj == nullptr) {
     return true;
   }
-  if (UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
-    return false;
+  return IsAligned<kObjectAlignment>(obj) && IsHeapAddress(obj);
+}
+
+bool Heap::IsHeapAddress(const mirror::Object* obj) const {
+  if (kMovingCollector && bump_pointer_space_->HasAddress(obj)) {
+    return true;
   }
-  return FindSpaceFromObject(obj, true) != NULL;
+  // TODO: This probably doesn't work for large objects.
+  return FindSpaceFromObject(obj, true) != nullptr;
 }
 
 bool Heap::IsLiveObjectLocked(const mirror::Object* obj, bool search_allocation_stack,
                               bool search_live_stack, bool sorted) {
   // Locks::heap_bitmap_lock_->AssertReaderHeld(Thread::Current());
-  if (obj == NULL || UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
+  if (obj == nullptr || UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
     return false;
   }
   space::ContinuousSpace* c_space = FindContinuousSpaceFromObject(obj, true);
@@ -642,6 +857,8 @@
     if (c_space->GetLiveBitmap()->Test(obj)) {
       return true;
     }
+  } else if (bump_pointer_space_->Contains(obj) || temp_space_->Contains(obj)) {
+      return true;
   } else {
     d_space = FindDiscontinuousSpaceFromObject(obj, true);
     if (d_space != NULL) {
@@ -655,7 +872,6 @@
     if (i > 0) {
       NanoSleep(MsToNs(10));
     }
-
     if (search_allocation_stack) {
       if (sorted) {
         if (allocation_stack_->ContainsSorted(const_cast<mirror::Object*>(obj))) {
@@ -699,16 +915,20 @@
   VerifyObjectBody(obj);
 }
 
-void Heap::DumpSpaces() {
+void Heap::DumpSpaces(std::ostream& stream) {
   for (const auto& space : continuous_spaces_) {
     accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
     accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-    LOG(INFO) << space << " " << *space << "\n"
-              << live_bitmap << " " << *live_bitmap << "\n"
-              << mark_bitmap << " " << *mark_bitmap;
+    stream << space << " " << *space << "\n";
+    if (live_bitmap != nullptr) {
+      stream << live_bitmap << " " << *live_bitmap << "\n";
+    }
+    if (mark_bitmap != nullptr) {
+      stream << mark_bitmap << " " << *mark_bitmap << "\n";
+    }
   }
   for (const auto& space : discontinuous_spaces_) {
-    LOG(INFO) << space << " " << *space << "\n";
+    stream << space << " " << *space << "\n";
   }
 }
 
@@ -735,7 +955,7 @@
   const mirror::Class* c_c_c = *reinterpret_cast<mirror::Class* const *>(raw_addr);
   CHECK_EQ(c_c, c_c_c);
 
-  if (verify_object_mode_ != kVerifyAllFast) {
+  if (verify_object_mode_ > kVerifyAllFast) {
     // TODO: the bitmap tests below are racy if VerifyObjectBody is called without the
     //       heap_bitmap_lock_.
     if (!IsLiveObjectLocked(obj)) {
@@ -758,31 +978,6 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-inline size_t Heap::RecordAllocationInstrumented(size_t size, mirror::Object* obj) {
-  DCHECK(obj != NULL);
-  DCHECK_GT(size, 0u);
-  size_t old_num_bytes_allocated = static_cast<size_t>(num_bytes_allocated_.fetch_add(size));
-
-  if (Runtime::Current()->HasStatsEnabled()) {
-    RuntimeStats* thread_stats = Thread::Current()->GetStats();
-    ++thread_stats->allocated_objects;
-    thread_stats->allocated_bytes += size;
-
-    // TODO: Update these atomically.
-    RuntimeStats* global_stats = Runtime::Current()->GetStats();
-    ++global_stats->allocated_objects;
-    global_stats->allocated_bytes += size;
-  }
-
-  // This is safe to do since the GC will never free objects which are neither in the allocation
-  // stack or the live bitmap.
-  while (!allocation_stack_->AtomicPushBack(obj)) {
-    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-  }
-
-  return old_num_bytes_allocated + size;
-}
-
 void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
   DCHECK_LE(freed_bytes, static_cast<size_t>(num_bytes_allocated_));
   num_bytes_allocated_.fetch_sub(freed_bytes);
@@ -799,111 +994,50 @@
   }
 }
 
-inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                                       bool grow, size_t* bytes_allocated) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
-    return NULL;
-  }
-  return space->Alloc(self, alloc_size, bytes_allocated);
-}
-
-// DlMallocSpace-specific version.
-inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                                       bool grow, size_t* bytes_allocated) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
-    return NULL;
-  }
-  if (LIKELY(!running_on_valgrind_)) {
-    return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
-  } else {
-    return space->Alloc(self, alloc_size, bytes_allocated);
-  }
-}
-
-template <class T>
-inline mirror::Object* Heap::AllocateInstrumented(Thread* self, T* space, size_t alloc_size,
-                                                  size_t* bytes_allocated) {
-  // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
-  // done in the runnable state where suspension is expected.
-  DCHECK_EQ(self->GetState(), kRunnable);
-  self->AssertThreadSuspensionIsAllowable();
-
-  mirror::Object* ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
-  if (LIKELY(ptr != NULL)) {
-    return ptr;
-  }
-  return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
-}
-
-mirror::Object* Heap::AllocateInternalWithGc(Thread* self, space::AllocSpace* space,
+mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocator,
                                              size_t alloc_size, size_t* bytes_allocated) {
-  mirror::Object* ptr;
-
+  mirror::Object* ptr = nullptr;
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
-  collector::GcType last_gc = WaitForConcurrentGcToComplete(self);
+  collector::GcType last_gc = WaitForGcToComplete(self);
   if (last_gc != collector::kGcTypeNone) {
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
-    ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
-    if (ptr != NULL) {
-      return ptr;
-    }
+    ptr = TryToAllocate<true>(self, allocator, alloc_size, false, bytes_allocated);
   }
 
   // Loop through our different Gc types and try to Gc until we get enough free memory.
-  for (size_t i = static_cast<size_t>(last_gc) + 1;
-      i < static_cast<size_t>(collector::kGcTypeMax); ++i) {
-    bool run_gc = false;
-    collector::GcType gc_type = static_cast<collector::GcType>(i);
-    switch (gc_type) {
-      case collector::kGcTypeSticky: {
-          const size_t alloc_space_size = alloc_space_->Size();
-          run_gc = alloc_space_size > min_alloc_space_size_for_sticky_gc_ &&
-              alloc_space_->Capacity() - alloc_space_size >= min_remaining_space_for_sticky_gc_;
-          break;
-        }
-      case collector::kGcTypePartial:
-        run_gc = have_zygote_space_;
-        break;
-      case collector::kGcTypeFull:
-        run_gc = true;
-        break;
-      default:
-        break;
+  for (collector::GcType gc_type : gc_plan_) {
+    if (ptr != nullptr) {
+      break;
     }
-
-    if (run_gc) {
-      // If we actually ran a different type of Gc than requested, we can skip the index forwards.
-      collector::GcType gc_type_ran = CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
-      DCHECK_GE(static_cast<size_t>(gc_type_ran), i);
-      i = static_cast<size_t>(gc_type_ran);
-
+    // Attempt to run the collector, if we succeed, re-try the allocation.
+    if (CollectGarbageInternal(gc_type, kGcCauseForAlloc, false) != collector::kGcTypeNone) {
       // Did we free sufficient memory for the allocation to succeed?
-      ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
-      if (ptr != NULL) {
-        return ptr;
-      }
+      ptr = TryToAllocate<true>(self, allocator, alloc_size, false, bytes_allocated);
     }
   }
-
   // Allocations have failed after GCs;  this is an exceptional state.
-  // Try harder, growing the heap if necessary.
-  ptr = TryToAllocateInstrumented(self, space, alloc_size, true, bytes_allocated);
-  if (ptr != NULL) {
-    return ptr;
+  if (ptr == nullptr) {
+    // Try harder, growing the heap if necessary.
+    ptr = TryToAllocate<true>(self, allocator, alloc_size, true, bytes_allocated);
   }
-
-  // Most allocations should have succeeded by now, so the heap is really full, really fragmented,
-  // or the requested size is really big. Do another GC, collecting SoftReferences this time. The
-  // VM spec requires that all SoftReferences have been collected and cleared before throwing OOME.
-
-  // OLD-TODO: wait for the finalizers from the previous GC to finish
-  VLOG(gc) << "Forcing collection of SoftReferences for " << PrettySize(alloc_size)
-           << " allocation";
-
-  // We don't need a WaitForConcurrentGcToComplete here either.
-  CollectGarbageInternal(collector::kGcTypeFull, kGcCauseForAlloc, true);
-  return TryToAllocateInstrumented(self, space, alloc_size, true, bytes_allocated);
+  if (ptr == nullptr) {
+    // Most allocations should have succeeded by now, so the heap is really full, really fragmented,
+    // or the requested size is really big. Do another GC, collecting SoftReferences this time. The
+    // VM spec requires that all SoftReferences have been collected and cleared before throwing
+    // OOME.
+    VLOG(gc) << "Forcing collection of SoftReferences for " << PrettySize(alloc_size)
+             << " allocation";
+    // TODO: Run finalization, but this may cause more allocations to occur.
+    // We don't need a WaitForGcToComplete here either.
+    DCHECK(!gc_plan_.empty());
+    CollectGarbageInternal(gc_plan_.back(), kGcCauseForAlloc, true);
+    ptr = TryToAllocate<true>(self, allocator, alloc_size, true, bytes_allocated);
+    if (ptr == nullptr) {
+      ThrowOutOfMemoryError(self, alloc_size, false);
+    }
+  }
+  return ptr;
 }
 
 void Heap::SetTargetHeapUtilization(float target) {
@@ -914,51 +1048,24 @@
 
 size_t Heap::GetObjectsAllocated() const {
   size_t total = 0;
-  typedef std::vector<space::ContinuousSpace*>::const_iterator It;
-  for (It it = continuous_spaces_.begin(), end = continuous_spaces_.end(); it != end; ++it) {
-    space::ContinuousSpace* space = *it;
-    if (space->IsDlMallocSpace()) {
-      total += space->AsDlMallocSpace()->GetObjectsAllocated();
-    }
-  }
-  typedef std::vector<space::DiscontinuousSpace*>::const_iterator It2;
-  for (It2 it = discontinuous_spaces_.begin(), end = discontinuous_spaces_.end(); it != end; ++it) {
-    space::DiscontinuousSpace* space = *it;
-    total += space->AsLargeObjectSpace()->GetObjectsAllocated();
+  for (space::AllocSpace* space : alloc_spaces_) {
+    total += space->GetObjectsAllocated();
   }
   return total;
 }
 
 size_t Heap::GetObjectsAllocatedEver() const {
   size_t total = 0;
-  typedef std::vector<space::ContinuousSpace*>::const_iterator It;
-  for (It it = continuous_spaces_.begin(), end = continuous_spaces_.end(); it != end; ++it) {
-    space::ContinuousSpace* space = *it;
-    if (space->IsDlMallocSpace()) {
-      total += space->AsDlMallocSpace()->GetTotalObjectsAllocated();
-    }
-  }
-  typedef std::vector<space::DiscontinuousSpace*>::const_iterator It2;
-  for (It2 it = discontinuous_spaces_.begin(), end = discontinuous_spaces_.end(); it != end; ++it) {
-    space::DiscontinuousSpace* space = *it;
-    total += space->AsLargeObjectSpace()->GetTotalObjectsAllocated();
+  for (space::AllocSpace* space : alloc_spaces_) {
+    total += space->GetTotalObjectsAllocated();
   }
   return total;
 }
 
 size_t Heap::GetBytesAllocatedEver() const {
   size_t total = 0;
-  typedef std::vector<space::ContinuousSpace*>::const_iterator It;
-  for (It it = continuous_spaces_.begin(), end = continuous_spaces_.end(); it != end; ++it) {
-    space::ContinuousSpace* space = *it;
-    if (space->IsDlMallocSpace()) {
-      total += space->AsDlMallocSpace()->GetTotalBytesAllocated();
-    }
-  }
-  typedef std::vector<space::DiscontinuousSpace*>::const_iterator It2;
-  for (It2 it = discontinuous_spaces_.begin(), end = discontinuous_spaces_.end(); it != end; ++it) {
-    space::DiscontinuousSpace* space = *it;
-    total += space->AsLargeObjectSpace()->GetTotalBytesAllocated();
+  for (space::AllocSpace* space : alloc_spaces_) {
+    total += space->GetTotalBytesAllocated();
   }
   return total;
 }
@@ -1056,8 +1163,8 @@
   // For bitmap Visit.
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
-  void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
-    collector::MarkSweep::VisitObjectReferences(obj, *this, true);
+  void operator()(const mirror::Object* o) const NO_THREAD_SAFETY_ANALYSIS {
+    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(o), *this, true);
   }
 
   // For MarkSweep::VisitObjectReferences.
@@ -1093,56 +1200,89 @@
 void Heap::CollectGarbage(bool clear_soft_references) {
   // Even if we waited for a GC we still need to do another GC since weaks allocated during the
   // last GC will not have necessarily been cleared.
-  Thread* self = Thread::Current();
-  WaitForConcurrentGcToComplete(self);
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseExplicit, clear_soft_references);
 }
 
+void Heap::ChangeCollector(CollectorType collector_type) {
+  switch (collector_type) {
+    case kCollectorTypeSS: {
+      ChangeAllocator(kAllocatorTypeBumpPointer);
+      break;
+    }
+    case kCollectorTypeMS:
+      // Fall-through.
+    case kCollectorTypeCMS: {
+      ChangeAllocator(kAllocatorTypeFreeList);
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented";
+    }
+  }
+}
+
 void Heap::PreZygoteFork() {
   static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
-  // Do this before acquiring the zygote creation lock so that we don't get lock order violations.
-  CollectGarbage(false);
   Thread* self = Thread::Current();
   MutexLock mu(self, zygote_creation_lock_);
-
   // Try to see if we have any Zygote spaces.
   if (have_zygote_space_) {
     return;
   }
-
-  VLOG(heap) << "Starting PreZygoteFork with alloc space size " << PrettySize(alloc_space_->Size());
-
-  {
-    // Flush the alloc stack.
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    FlushAllocStack();
+  VLOG(heap) << "Starting PreZygoteFork";
+  // Do this before acquiring the zygote creation lock so that we don't get lock order violations.
+  CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
+  // Trim the pages at the end of the non moving space.
+  non_moving_space_->Trim();
+  non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+  // Change the allocator to the post zygote one.
+  ChangeCollector(collector_type_);
+  // TODO: Delete bump_pointer_space_ and temp_pointer_space_?
+  if (semi_space_collector_ != nullptr) {
+    // Create a new bump pointer space which we will compact into.
+    space::BumpPointerSpace target_space("zygote bump space", non_moving_space_->End(),
+                                         non_moving_space_->Limit());
+    // Compact the bump pointer space to a new zygote bump pointer space.
+    temp_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+    Compact(&target_space, bump_pointer_space_);
+    CHECK_EQ(temp_space_->GetBytesAllocated(), 0U);
+    total_objects_freed_ever_ += semi_space_collector_->GetFreedObjects();
+    total_bytes_freed_ever_ += semi_space_collector_->GetFreedBytes();
+    // Update the end and write out image.
+    non_moving_space_->SetEnd(target_space.End());
+    non_moving_space_->SetLimit(target_space.Limit());
+    accounting::SpaceBitmap* bitmap = non_moving_space_->GetLiveBitmap();
+    // Record the allocations in the bitmap.
+    VLOG(heap) << "Recording zygote allocations";
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(target_space.Begin());
+    const mirror::Object* end = reinterpret_cast<const mirror::Object*>(target_space.End());
+    while (obj < end) {
+      bitmap->Set(obj);
+      obj = space::BumpPointerSpace::GetNextObject(obj);
+    }
   }
-
-  // Turns the current alloc space into a Zygote space and obtain the new alloc space composed
-  // of the remaining available heap memory.
-  space::DlMallocSpace* zygote_space = alloc_space_;
-  alloc_space_ = zygote_space->CreateZygoteSpace("alloc space");
-  alloc_space_->SetFootprintLimit(alloc_space_->Capacity());
-
+  // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
+  // the remaining available heap memory.
+  space::MallocSpace* zygote_space = non_moving_space_;
+  non_moving_space_ = zygote_space->CreateZygoteSpace("alloc space");
+  non_moving_space_->SetFootprintLimit(non_moving_space_->Capacity());
   // Change the GC retention policy of the zygote space to only collect when full.
   zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
-  AddContinuousSpace(alloc_space_);
+  AddSpace(non_moving_space_);
   have_zygote_space_ = true;
-
+  zygote_space->InvalidateAllocator();
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
       new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space);
   CHECK(mod_union_table != nullptr) << "Failed to create zygote space mod-union table";
   AddModUnionTable(mod_union_table);
-
   // Reset the cumulative loggers since we now have a few additional timing phases.
-  for (const auto& collector : mark_sweep_collectors_) {
+  for (const auto& collector : garbage_collectors_) {
     collector->ResetCumulativeStatistics();
   }
 }
 
 void Heap::FlushAllocStack() {
-  MarkAllocStack(alloc_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(),
+  MarkAllocStack(non_moving_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(),
                  allocation_stack_.get());
   allocation_stack_->Reset();
 }
@@ -1161,86 +1301,131 @@
   }
 }
 
+const char* PrettyCause(GcCause cause) {
+  switch (cause) {
+    case kGcCauseForAlloc: return "Alloc";
+    case kGcCauseBackground: return "Background";
+    case kGcCauseExplicit: return "Explicit";
+    default:
+      LOG(FATAL) << "Unreachable";
+  }
+  return "";
+}
 
-const char* gc_cause_and_type_strings[3][4] = {
-    {"", "GC Alloc Sticky", "GC Alloc Partial", "GC Alloc Full"},
-    {"", "GC Background Sticky", "GC Background Partial", "GC Background Full"},
-    {"", "GC Explicit Sticky", "GC Explicit Partial", "GC Explicit Full"}};
+void Heap::SwapSemiSpaces() {
+  // Swap the spaces so we allocate into the space which we just evacuated.
+  std::swap(bump_pointer_space_, temp_space_);
+}
+
+void Heap::Compact(space::ContinuousMemMapAllocSpace* target_space,
+                   space::ContinuousMemMapAllocSpace* source_space) {
+  CHECK(kMovingCollector);
+  CHECK_NE(target_space, source_space) << "In-place compaction currently unsupported";
+  if (target_space != source_space) {
+    semi_space_collector_->SetFromSpace(source_space);
+    semi_space_collector_->SetToSpace(target_space);
+    semi_space_collector_->Run(false);
+  }
+}
 
 collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCause gc_cause,
                                                bool clear_soft_references) {
   Thread* self = Thread::Current();
-
+  Runtime* runtime = Runtime::Current();
+  // If the heap can't run the GC, silently fail and return that no GC was run.
+  switch (gc_type) {
+    case collector::kGcTypeSticky: {
+      const size_t alloc_space_size = non_moving_space_->Size();
+      if (alloc_space_size < min_alloc_space_size_for_sticky_gc_ ||
+        non_moving_space_->Capacity() - alloc_space_size < min_remaining_space_for_sticky_gc_) {
+        return collector::kGcTypeNone;
+      }
+      break;
+    }
+    case collector::kGcTypePartial: {
+      if (!have_zygote_space_) {
+        return collector::kGcTypeNone;
+      }
+      break;
+    }
+    default: {
+      // Other GC types don't have any special cases which makes them not runnable. The main case
+      // here is full GC.
+    }
+  }
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
-
   if (self->IsHandlingStackOverflow()) {
     LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow.";
   }
-
-  // Ensure there is only one GC at a time.
-  bool start_collect = false;
-  while (!start_collect) {
-    {
-      MutexLock mu(self, *gc_complete_lock_);
-      if (!is_gc_running_) {
-        is_gc_running_ = true;
-        start_collect = true;
-      }
+  {
+    gc_complete_lock_->AssertNotHeld(self);
+    MutexLock mu(self, *gc_complete_lock_);
+    // Ensure there is only one GC at a time.
+    WaitForGcToCompleteLocked(self);
+    // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
+    //       Not doing at the moment to ensure soft references are cleared.
+    // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
+    if (gc_disable_count_ != 0) {
+      LOG(WARNING) << "Skipping GC due to disable count " << gc_disable_count_;
+      return collector::kGcTypeNone;
     }
-    if (!start_collect) {
-      // TODO: timinglog this.
-      WaitForConcurrentGcToComplete(self);
-
-      // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
-      //       Not doing at the moment to ensure soft references are cleared.
-    }
+    is_gc_running_ = true;
   }
-  gc_complete_lock_->AssertNotHeld(self);
-
-  if (gc_cause == kGcCauseForAlloc && Runtime::Current()->HasStatsEnabled()) {
-    ++Runtime::Current()->GetStats()->gc_for_alloc_count;
-    ++Thread::Current()->GetStats()->gc_for_alloc_count;
+  if (gc_cause == kGcCauseForAlloc && runtime->HasStatsEnabled()) {
+    ++runtime->GetStats()->gc_for_alloc_count;
+    ++self->GetStats()->gc_for_alloc_count;
   }
-
   uint64_t gc_start_time_ns = NanoTime();
   uint64_t gc_start_size = GetBytesAllocated();
   // Approximate allocation rate in bytes / second.
-  if (UNLIKELY(gc_start_time_ns == last_gc_time_ns_)) {
-    LOG(WARNING) << "Timers are broken (gc_start_time == last_gc_time_).";
-  }
   uint64_t ms_delta = NsToMs(gc_start_time_ns - last_gc_time_ns_);
-  if (ms_delta != 0) {
+  // Back to back GCs can cause 0 ms of wait time in between GC invocations.
+  if (LIKELY(ms_delta != 0)) {
     allocation_rate_ = ((gc_start_size - last_gc_size_) * 1000) / ms_delta;
     VLOG(heap) << "Allocation rate: " << PrettySize(allocation_rate_) << "/s";
   }
 
-  if (gc_type == collector::kGcTypeSticky &&
-      alloc_space_->Size() < min_alloc_space_size_for_sticky_gc_) {
-    gc_type = collector::kGcTypePartial;
-  }
-
   DCHECK_LT(gc_type, collector::kGcTypeMax);
   DCHECK_NE(gc_type, collector::kGcTypeNone);
-  DCHECK_LE(gc_cause, kGcCauseExplicit);
 
-  ATRACE_BEGIN(gc_cause_and_type_strings[gc_cause][gc_type]);
-
-  collector::MarkSweep* collector = NULL;
-  for (const auto& cur_collector : mark_sweep_collectors_) {
-    if (cur_collector->IsConcurrent() == concurrent_gc_ && cur_collector->GetGcType() == gc_type) {
-      collector = cur_collector;
-      break;
+  collector::GarbageCollector* collector = nullptr;
+  // TODO: Clean this up.
+  if (current_allocator_ == kAllocatorTypeBumpPointer) {
+    gc_type = semi_space_collector_->GetGcType();
+    CHECK_EQ(temp_space_->GetObjectsAllocated(), 0U);
+    semi_space_collector_->SetFromSpace(bump_pointer_space_);
+    semi_space_collector_->SetToSpace(temp_space_);
+    mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
+    collector = semi_space_collector_;
+    gc_type = collector::kGcTypeFull;
+  } else if (current_allocator_ == kAllocatorTypeFreeList) {
+    for (const auto& cur_collector : garbage_collectors_) {
+      if (cur_collector->IsConcurrent() == concurrent_gc_ &&
+          cur_collector->GetGcType() == gc_type) {
+        collector = cur_collector;
+        break;
+      }
     }
+  } else {
+    LOG(FATAL) << "Invalid current allocator " << current_allocator_;
   }
   CHECK(collector != NULL)
       << "Could not find garbage collector with concurrent=" << concurrent_gc_
       << " and type=" << gc_type;
 
-  collector->clear_soft_references_ = clear_soft_references;
-  collector->Run();
+  ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
+
+  collector->Run(clear_soft_references);
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
+
+  // Enqueue cleared references.
+  EnqueueClearedReferences();
+
+  // Grow the heap so that we know when to perform the next GC.
+  GrowForUtilization(gc_type, collector->GetDurationNs());
+
   if (care_about_pause_times_) {
     const size_t duration = collector->GetDurationNs();
     std::vector<uint64_t> pauses = collector->GetPauseTimes();
@@ -1252,7 +1437,6 @@
         was_slow = was_slow || pause > long_pause_log_threshold_;
       }
     }
-
     if (was_slow) {
         const size_t percent_free = GetPercentFree();
         const size_t current_heap_size = GetBytesAllocated();
@@ -1271,7 +1455,7 @@
                   << PrettySize(total_memory) << ", " << "paused " << pause_string.str()
                   << " total " << PrettyDuration((duration / 1000) * 1000);
         if (VLOG_IS_ON(heap)) {
-            LOG(INFO) << Dumpable<base::TimingLogger>(collector->GetTimings());
+            LOG(INFO) << Dumpable<TimingLogger>(collector->GetTimings());
         }
     }
   }
@@ -1327,7 +1511,6 @@
       accounting::CardTable* card_table = heap_->GetCardTable();
       accounting::ObjectStack* alloc_stack = heap_->allocation_stack_.get();
       accounting::ObjectStack* live_stack = heap_->live_stack_.get();
-
       if (!failed_) {
         // Print message on only on first failure to prevent spam.
         LOG(ERROR) << "!!!!!!!!!!!!!!Heap corruption detected!!!!!!!!!!!!!!!!!!!";
@@ -1337,7 +1520,7 @@
         byte* card_addr = card_table->CardFromAddr(obj);
         LOG(ERROR) << "Object " << obj << " references dead object " << ref << " at offset "
                    << offset << "\n card value = " << static_cast<int>(*card_addr);
-        if (heap_->IsHeapAddress(obj->GetClass())) {
+        if (heap_->IsValidObjectAddress(obj->GetClass())) {
           LOG(ERROR) << "Obj type " << PrettyTypeOf(obj);
         } else {
           LOG(ERROR) << "Object " << obj << " class(" << obj->GetClass() << ") not a heap address";
@@ -1345,8 +1528,8 @@
 
         // Attmept to find the class inside of the recently freed objects.
         space::ContinuousSpace* ref_space = heap_->FindContinuousSpaceFromObject(ref, true);
-        if (ref_space->IsDlMallocSpace()) {
-          space::DlMallocSpace* space = ref_space->AsDlMallocSpace();
+        if (ref_space != nullptr && ref_space->IsMallocSpace()) {
+          space::MallocSpace* space = ref_space->AsMallocSpace();
           mirror::Class* ref_class = space->FindRecentFreedObject(ref);
           if (ref_class != nullptr) {
             LOG(ERROR) << "Reference " << ref << " found as a recently freed object with class "
@@ -1356,7 +1539,7 @@
           }
         }
 
-        if (ref->GetClass() != nullptr && heap_->IsHeapAddress(ref->GetClass()) &&
+        if (ref->GetClass() != nullptr && heap_->IsValidObjectAddress(ref->GetClass()) &&
             ref->GetClass()->IsClass()) {
           LOG(ERROR) << "Ref type " << PrettyTypeOf(ref);
         } else {
@@ -1427,17 +1610,25 @@
  public:
   explicit VerifyObjectVisitor(Heap* heap) : heap_(heap), failed_(false) {}
 
-  void operator()(const mirror::Object* obj) const
+  void operator()(mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     // Note: we are verifying the references in obj but not obj itself, this is because obj must
     // be live or else how did we find it in the live bitmap?
     VerifyReferenceVisitor visitor(heap_);
     // The class doesn't count as a reference but we should verify it anyways.
-    visitor(obj, obj->GetClass(), MemberOffset(0), false);
-    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(obj), visitor, true);
+    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
+    if (obj->GetClass()->IsReferenceClass()) {
+      visitor(obj, heap_->GetReferenceReferent(obj), MemberOffset(0), false);
+    }
     failed_ = failed_ || visitor.Failed();
   }
 
+  static void VisitCallback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    VerifyObjectVisitor* visitor = reinterpret_cast<VerifyObjectVisitor*>(arg);
+    visitor->operator()(obj);
+  }
+
   bool Failed() const {
     return failed_;
   }
@@ -1453,18 +1644,15 @@
   // Lets sort our allocation stacks so that we can efficiently binary search them.
   allocation_stack_->Sort();
   live_stack_->Sort();
-  // Perform the verification.
   VerifyObjectVisitor visitor(this);
-  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRoots, &visitor, false, false);
-  GetLiveBitmap()->Visit(visitor);
   // Verify objects in the allocation stack since these will be objects which were:
   // 1. Allocated prior to the GC (pre GC verification).
   // 2. Allocated during the GC (pre sweep GC verification).
-  for (mirror::Object** it = allocation_stack_->Begin(); it != allocation_stack_->End(); ++it) {
-    visitor(*it);
-  }
   // We don't want to verify the objects in the live stack since they themselves may be
   // pointing to dead objects if they are not reachable.
+  VisitObjects(VerifyObjectVisitor::VisitCallback, &visitor);
+  // Verify the roots:
+  Runtime::Current()->VisitRoots(VerifyReferenceVisitor::VerifyRoots, &visitor, false, false);
   if (visitor.Failed()) {
     // Dump mod-union tables.
     for (const auto& table_pair : mod_union_tables_) {
@@ -1557,7 +1745,7 @@
   void operator()(mirror::Object* obj) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     VerifyReferenceCardVisitor visitor(heap_, const_cast<bool*>(&failed_));
-    collector::MarkSweep::VisitObjectReferences(obj, visitor, true);
+    collector::MarkSweep::VisitObjectReferences(const_cast<mirror::Object*>(obj), visitor, true);
   }
 
   bool Failed() const {
@@ -1601,19 +1789,23 @@
   return it->second;
 }
 
-void Heap::ProcessCards(base::TimingLogger& timings) {
+void Heap::ProcessCards(TimingLogger& timings) {
   // Clear cards and keep track of cards cleared in the mod-union table.
   for (const auto& space : continuous_spaces_) {
     accounting::ModUnionTable* table = FindModUnionTableFromSpace(space);
     if (table != nullptr) {
       const char* name = space->IsZygoteSpace() ? "ZygoteModUnionClearCards" :
           "ImageModUnionClearCards";
-      base::TimingLogger::ScopedSplit split(name, &timings);
+      TimingLogger::ScopedSplit split(name, &timings);
       table->ClearCards();
-    } else {
-      base::TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings);
+    } else if (space->GetType() != space::kSpaceTypeBumpPointerSpace) {
+      TimingLogger::ScopedSplit split("AllocSpaceClearCards", &timings);
       // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards
       // were dirty before the GC started.
+      // TODO: Don't need to use atomic.
+      // The races are we either end up with: Aged card, unaged card. Since we have the checkpoint
+      // roots and then we scan / update mod union tables after. We will always scan either card.//
+      // If we end up with the non aged card, we scan it it in the pause.
       card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), VoidFunctor());
     }
   }
@@ -1692,36 +1884,27 @@
   }
 }
 
-collector::GcType Heap::WaitForConcurrentGcToComplete(Thread* self) {
+collector::GcType Heap::WaitForGcToComplete(Thread* self) {
+  ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+  MutexLock mu(self, *gc_complete_lock_);
+  return WaitForGcToCompleteLocked(self);
+}
+
+collector::GcType Heap::WaitForGcToCompleteLocked(Thread* self) {
   collector::GcType last_gc_type = collector::kGcTypeNone;
-  if (concurrent_gc_) {
-    ATRACE_BEGIN("GC: Wait For Concurrent");
-    bool do_wait;
-    uint64_t wait_start = NanoTime();
-    {
-      // Check if GC is running holding gc_complete_lock_.
-      MutexLock mu(self, *gc_complete_lock_);
-      do_wait = is_gc_running_;
-    }
-    if (do_wait) {
-      uint64_t wait_time;
-      // We must wait, change thread state then sleep on gc_complete_cond_;
-      ScopedThreadStateChange tsc(Thread::Current(), kWaitingForGcToComplete);
-      {
-        MutexLock mu(self, *gc_complete_lock_);
-        while (is_gc_running_) {
-          gc_complete_cond_->Wait(self);
-        }
-        last_gc_type = last_gc_type_;
-        wait_time = NanoTime() - wait_start;
-        total_wait_time_ += wait_time;
-      }
-      if (wait_time > long_pause_log_threshold_) {
-        LOG(INFO) << "WaitForConcurrentGcToComplete blocked for " << PrettyDuration(wait_time);
-      }
-    }
+  uint64_t wait_start = NanoTime();
+  while (is_gc_running_) {
+    ATRACE_BEGIN("GC: Wait For Completion");
+    // We must wait, change thread state then sleep on gc_complete_cond_;
+    gc_complete_cond_->Wait(self);
+    last_gc_type = last_gc_type_;
     ATRACE_END();
   }
+  uint64_t wait_time = NanoTime() - wait_start;
+  total_wait_time_ += wait_time;
+  if (wait_time > long_pause_log_threshold_) {
+    LOG(INFO) << "WaitForGcToComplete blocked for " << PrettyDuration(wait_time);
+  }
   return last_gc_type;
 }
 
@@ -1744,6 +1927,23 @@
   max_allowed_footprint_ = max_allowed_footprint;
 }
 
+bool Heap::IsMovableObject(const mirror::Object* obj) const {
+  if (kMovingCollector) {
+    DCHECK(!IsInTempSpace(obj));
+    if (bump_pointer_space_->HasAddress(obj)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Heap::IsInTempSpace(const mirror::Object* obj) const {
+  if (temp_space_->HasAddress(obj) && !temp_space_->Contains(obj)) {
+    return true;
+  }
+  return false;
+}
+
 void Heap::UpdateMaxNativeFootprint() {
   size_t native_size = native_bytes_allocated_;
   // TODO: Tune the native heap utilization to be a value other than the java heap utilization.
@@ -1763,7 +1963,6 @@
   const size_t bytes_allocated = GetBytesAllocated();
   last_gc_size_ = bytes_allocated;
   last_gc_time_ns_ = NanoTime();
-
   size_t target_size;
   if (gc_type != collector::kGcTypeSticky) {
     // Grow the heap for non sticky GC.
@@ -1773,6 +1972,7 @@
     } else if (target_size < bytes_allocated + min_free_) {
       target_size = bytes_allocated + min_free_;
     }
+    native_need_to_run_finalization_ = true;
     next_gc_type_ = collector::kGcTypeSticky;
   } else {
     // Based on how close the current heap size is to the target size, decide
@@ -1782,7 +1982,6 @@
     } else {
       next_gc_type_ = collector::kGcTypePartial;
     }
-
     // If we have freed enough memory, shrink the heap back down.
     if (bytes_allocated + max_free_ < max_allowed_footprint_) {
       target_size = bytes_allocated + max_free_;
@@ -1790,13 +1989,10 @@
       target_size = std::max(bytes_allocated, max_allowed_footprint_);
     }
   }
-
   if (!ignore_max_footprint_) {
     SetIdealFootprint(target_size);
-
-    if (concurrent_gc_) {
+    if (concurrent_gc_ && AllocatorHasConcurrentGC(current_allocator_)) {
       // Calculate when to perform the next ConcurrentGC.
-
       // Calculate the estimated GC duration.
       double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
       // Estimate how many remaining bytes we will have when we need to start the next GC.
@@ -1811,26 +2007,25 @@
         // Start a concurrent GC when we get close to the estimated remaining bytes. When the
         // allocation rate is very high, remaining_bytes could tell us that we should start a GC
         // right away.
-        concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes, bytes_allocated);
+        concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes,
+                                           bytes_allocated);
       }
       DCHECK_LE(concurrent_start_bytes_, max_allowed_footprint_);
       DCHECK_LE(max_allowed_footprint_, growth_limit_);
     }
   }
-
-  UpdateMaxNativeFootprint();
 }
 
 void Heap::ClearGrowthLimit() {
   growth_limit_ = capacity_;
-  alloc_space_->ClearGrowthLimit();
+  non_moving_space_->ClearGrowthLimit();
 }
 
 void Heap::SetReferenceOffsets(MemberOffset reference_referent_offset,
-                                MemberOffset reference_queue_offset,
-                                MemberOffset reference_queueNext_offset,
-                                MemberOffset reference_pendingNext_offset,
-                                MemberOffset finalizer_reference_zombie_offset) {
+                               MemberOffset reference_queue_offset,
+                               MemberOffset reference_queueNext_offset,
+                               MemberOffset reference_pendingNext_offset,
+                               MemberOffset finalizer_reference_zombie_offset) {
   reference_referent_offset_ = reference_referent_offset;
   reference_queue_offset_ = reference_queue_offset;
   reference_queueNext_offset_ = reference_queueNext_offset;
@@ -1843,78 +2038,18 @@
   CHECK_NE(finalizer_reference_zombie_offset_.Uint32Value(), 0U);
 }
 
+void Heap::SetReferenceReferent(mirror::Object* reference, mirror::Object* referent) {
+  DCHECK(reference != NULL);
+  DCHECK_NE(reference_referent_offset_.Uint32Value(), 0U);
+  reference->SetFieldObject(reference_referent_offset_, referent, true);
+}
+
 mirror::Object* Heap::GetReferenceReferent(mirror::Object* reference) {
   DCHECK(reference != NULL);
   DCHECK_NE(reference_referent_offset_.Uint32Value(), 0U);
   return reference->GetFieldObject<mirror::Object*>(reference_referent_offset_, true);
 }
 
-void Heap::ClearReferenceReferent(mirror::Object* reference) {
-  DCHECK(reference != NULL);
-  DCHECK_NE(reference_referent_offset_.Uint32Value(), 0U);
-  reference->SetFieldObject(reference_referent_offset_, NULL, true);
-}
-
-// Returns true if the reference object has not yet been enqueued.
-bool Heap::IsEnqueuable(const mirror::Object* ref) {
-  DCHECK(ref != NULL);
-  const mirror::Object* queue =
-      ref->GetFieldObject<mirror::Object*>(reference_queue_offset_, false);
-  const mirror::Object* queue_next =
-      ref->GetFieldObject<mirror::Object*>(reference_queueNext_offset_, false);
-  return (queue != NULL) && (queue_next == NULL);
-}
-
-void Heap::EnqueueReference(mirror::Object* ref, mirror::Object** cleared_reference_list) {
-  DCHECK(ref != NULL);
-  CHECK(ref->GetFieldObject<mirror::Object*>(reference_queue_offset_, false) != NULL);
-  CHECK(ref->GetFieldObject<mirror::Object*>(reference_queueNext_offset_, false) == NULL);
-  EnqueuePendingReference(ref, cleared_reference_list);
-}
-
-bool Heap::IsEnqueued(mirror::Object* ref) {
-  // Since the references are stored as cyclic lists it means that once enqueued, the pending next
-  // will always be non-null.
-  return ref->GetFieldObject<mirror::Object*>(GetReferencePendingNextOffset(), false) != nullptr;
-}
-
-void Heap::EnqueuePendingReference(mirror::Object* ref, mirror::Object** list) {
-  DCHECK(ref != NULL);
-  DCHECK(list != NULL);
-  if (*list == NULL) {
-    // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
-    ref->SetFieldObject(reference_pendingNext_offset_, ref, false);
-    *list = ref;
-  } else {
-    mirror::Object* head =
-        (*list)->GetFieldObject<mirror::Object*>(reference_pendingNext_offset_, false);
-    ref->SetFieldObject(reference_pendingNext_offset_, head, false);
-    (*list)->SetFieldObject(reference_pendingNext_offset_, ref, false);
-  }
-}
-
-mirror::Object* Heap::DequeuePendingReference(mirror::Object** list) {
-  DCHECK(list != NULL);
-  DCHECK(*list != NULL);
-  mirror::Object* head = (*list)->GetFieldObject<mirror::Object*>(reference_pendingNext_offset_,
-                                                                  false);
-  mirror::Object* ref;
-
-  // Note: the following code is thread-safe because it is only called from ProcessReferences which
-  // is single threaded.
-  if (*list == head) {
-    ref = *list;
-    *list = NULL;
-  } else {
-    mirror::Object* next = head->GetFieldObject<mirror::Object*>(reference_pendingNext_offset_,
-                                                                 false);
-    (*list)->SetFieldObject(reference_pendingNext_offset_, next, false);
-    ref = head;
-  }
-  ref->SetFieldObject(reference_pendingNext_offset_, NULL, false);
-  return ref;
-}
-
 void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
   ScopedObjectAccess soa(self);
   JValue result;
@@ -1924,19 +2059,18 @@
       arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
 }
 
-void Heap::EnqueueClearedReferences(mirror::Object** cleared) {
-  DCHECK(cleared != NULL);
-  if (*cleared != NULL) {
+void Heap::EnqueueClearedReferences() {
+  if (!cleared_references_.IsEmpty()) {
     // When a runtime isn't started there are no reference queues to care about so ignore.
     if (LIKELY(Runtime::Current()->IsStarted())) {
       ScopedObjectAccess soa(Thread::Current());
       JValue result;
       ArgArray arg_array(NULL, 0);
-      arg_array.Append(reinterpret_cast<uint32_t>(*cleared));
+      arg_array.Append(reinterpret_cast<uint32_t>(cleared_references_.GetList()));
       soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
           arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
     }
-    *cleared = NULL;
+    cleared_references_.Clear();
   }
 }
 
@@ -1944,43 +2078,39 @@
   // Make sure that we can do a concurrent GC.
   Runtime* runtime = Runtime::Current();
   DCHECK(concurrent_gc_);
-  if (runtime == NULL || !runtime->IsFinishedStarting() ||
-      !runtime->IsConcurrentGcEnabled()) {
+  if (runtime == NULL || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self) ||
+      self->IsHandlingStackOverflow()) {
     return;
   }
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    if (runtime->IsShuttingDown()) {
-      return;
-    }
-  }
-  if (self->IsHandlingStackOverflow()) {
-    return;
-  }
-
   // We already have a request pending, no reason to start more until we update
   // concurrent_start_bytes_.
   concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
-
   JNIEnv* env = self->GetJniEnv();
-  DCHECK(WellKnownClasses::java_lang_Daemons != NULL);
-  DCHECK(WellKnownClasses::java_lang_Daemons_requestGC != NULL);
+  DCHECK(WellKnownClasses::java_lang_Daemons != nullptr);
+  DCHECK(WellKnownClasses::java_lang_Daemons_requestGC != nullptr);
   env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
                             WellKnownClasses::java_lang_Daemons_requestGC);
   CHECK(!env->ExceptionCheck());
 }
 
 void Heap::ConcurrentGC(Thread* self) {
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    if (Runtime::Current()->IsShuttingDown()) {
-      return;
-    }
+  if (Runtime::Current()->IsShuttingDown(self)) {
+    return;
   }
-
   // Wait for any GCs currently running to finish.
-  if (WaitForConcurrentGcToComplete(self) == collector::kGcTypeNone) {
-    CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false);
+  if (WaitForGcToComplete(self) == collector::kGcTypeNone) {
+    // If the we can't run the GC type we wanted to run, find the next appropriate one and try that
+    // instead. E.g. can't do partial, so do full instead.
+    if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) ==
+        collector::kGcTypeNone) {
+      for (collector::GcType gc_type : gc_plan_) {
+        // Attempt to run the collector, if we succeed, we are done.
+        if (gc_type > next_gc_type_ &&
+            CollectGarbageInternal(gc_type, kGcCauseBackground, false) != collector::kGcTypeNone) {
+          break;
+        }
+      }
+    }
   }
 }
 
@@ -1998,26 +2128,18 @@
   // We could try mincore(2) but that's only a measure of how many pages we haven't given away,
   // not how much use we're making of those pages.
   uint64_t ms_time = MilliTime();
-  // Note the large object space's bytes allocated is equal to its capacity.
-  uint64_t los_bytes_allocated = large_object_space_->GetBytesAllocated();
-  float utilization = static_cast<float>(GetBytesAllocated() - los_bytes_allocated) /
-      (GetTotalMemory() - los_bytes_allocated);
-  if ((utilization > 0.75f && !IsLowMemoryMode()) || ((ms_time - last_trim_time_ms_) < 2 * 1000)) {
-    // Don't bother trimming the alloc space if it's more than 75% utilized and low memory mode is
-    // not enabled, or if a heap trim occurred in the last two seconds.
+  // Don't bother trimming the alloc space if a heap trim occurred in the last two seconds.
+  if (ms_time - last_trim_time_ms_ < 2 * 1000) {
     return;
   }
 
   Thread* self = Thread::Current();
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    Runtime* runtime = Runtime::Current();
-    if (runtime == NULL || !runtime->IsFinishedStarting() || runtime->IsShuttingDown()) {
-      // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time)
-      // Also: we do not wish to start a heap trim if the runtime is shutting down (a racy check
-      // as we don't hold the lock while requesting the trim).
-      return;
-    }
+  Runtime* runtime = Runtime::Current();
+  if (runtime == nullptr || !runtime->IsFinishedStarting() || runtime->IsShuttingDown(self)) {
+    // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time)
+    // Also: we do not wish to start a heap trim if the runtime is shutting down (a racy check
+    // as we don't hold the lock while requesting the trim).
+    return;
   }
 
   last_trim_time_ms_ = ms_time;
@@ -2034,50 +2156,66 @@
   }
 }
 
-size_t Heap::Trim() {
-  // Handle a requested heap trim on a thread outside of the main GC thread.
-  return alloc_space_->Trim();
+void Heap::RevokeThreadLocalBuffers(Thread* thread) {
+  non_moving_space_->RevokeThreadLocalBuffers(thread);
+}
+
+void Heap::RevokeAllThreadLocalBuffers() {
+  non_moving_space_->RevokeAllThreadLocalBuffers();
 }
 
 bool Heap::IsGCRequestPending() const {
   return concurrent_start_bytes_ != std::numeric_limits<size_t>::max();
 }
 
+void Heap::RunFinalization(JNIEnv* env) {
+  // Can't do this in WellKnownClasses::Init since System is not properly set up at that point.
+  if (WellKnownClasses::java_lang_System_runFinalization == nullptr) {
+    CHECK(WellKnownClasses::java_lang_System != nullptr);
+    WellKnownClasses::java_lang_System_runFinalization =
+        CacheMethod(env, WellKnownClasses::java_lang_System, true, "runFinalization", "()V");
+    CHECK(WellKnownClasses::java_lang_System_runFinalization != nullptr);
+  }
+  env->CallStaticVoidMethod(WellKnownClasses::java_lang_System,
+                            WellKnownClasses::java_lang_System_runFinalization);
+}
+
 void Heap::RegisterNativeAllocation(JNIEnv* env, int bytes) {
+  Thread* self = ThreadForEnv(env);
+  if (native_need_to_run_finalization_) {
+    RunFinalization(env);
+    UpdateMaxNativeFootprint();
+    native_need_to_run_finalization_ = false;
+  }
   // Total number of native bytes allocated.
   native_bytes_allocated_.fetch_add(bytes);
   if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_gc_watermark_) {
+    collector::GcType gc_type = have_zygote_space_ ? collector::kGcTypePartial :
+        collector::kGcTypeFull;
+
     // The second watermark is higher than the gc watermark. If you hit this it means you are
     // allocating native objects faster than the GC can keep up with.
     if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
-        // Can't do this in WellKnownClasses::Init since System is not properly set up at that
-        // point.
-        if (UNLIKELY(WellKnownClasses::java_lang_System_runFinalization == NULL)) {
-          DCHECK(WellKnownClasses::java_lang_System != NULL);
-          WellKnownClasses::java_lang_System_runFinalization =
-              CacheMethod(env, WellKnownClasses::java_lang_System, true, "runFinalization", "()V");
-          CHECK(WellKnownClasses::java_lang_System_runFinalization != NULL);
-        }
-        if (WaitForConcurrentGcToComplete(ThreadForEnv(env)) != collector::kGcTypeNone) {
-          // Just finished a GC, attempt to run finalizers.
-          env->CallStaticVoidMethod(WellKnownClasses::java_lang_System,
-                                    WellKnownClasses::java_lang_System_runFinalization);
-          CHECK(!env->ExceptionCheck());
-        }
-
-        // If we still are over the watermark, attempt a GC for alloc and run finalizers.
-        if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
-          CollectGarbageInternal(collector::kGcTypePartial, kGcCauseForAlloc, false);
-          env->CallStaticVoidMethod(WellKnownClasses::java_lang_System,
-                                    WellKnownClasses::java_lang_System_runFinalization);
-          CHECK(!env->ExceptionCheck());
-        }
-        // We have just run finalizers, update the native watermark since it is very likely that
-        // finalizers released native managed allocations.
-        UpdateMaxNativeFootprint();
-    } else {
-      if (!IsGCRequestPending()) {
-        RequestConcurrentGC(ThreadForEnv(env));
+      if (WaitForGcToComplete(self) != collector::kGcTypeNone) {
+        // Just finished a GC, attempt to run finalizers.
+        RunFinalization(env);
+        CHECK(!env->ExceptionCheck());
+      }
+      // If we still are over the watermark, attempt a GC for alloc and run finalizers.
+      if (static_cast<size_t>(native_bytes_allocated_) > native_footprint_limit_) {
+        CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
+        RunFinalization(env);
+        native_need_to_run_finalization_ = false;
+        CHECK(!env->ExceptionCheck());
+      }
+      // We have just run finalizers, update the native watermark since it is very likely that
+      // finalizers released native managed allocations.
+      UpdateMaxNativeFootprint();
+    } else if (!IsGCRequestPending()) {
+      if (concurrent_gc_ && AllocatorHasConcurrentGC(current_allocator_)) {
+        RequestConcurrentGC(self);
+      } else {
+        CollectGarbageInternal(gc_type, kGcCauseForAlloc, false);
       }
     }
   }
@@ -2086,26 +2224,24 @@
 void Heap::RegisterNativeFree(JNIEnv* env, int bytes) {
   int expected_size, new_size;
   do {
-      expected_size = native_bytes_allocated_.load();
-      new_size = expected_size - bytes;
-      if (UNLIKELY(new_size < 0)) {
-        ScopedObjectAccess soa(env);
-        env->ThrowNew(WellKnownClasses::java_lang_RuntimeException,
-                      StringPrintf("Attempted to free %d native bytes with only %d native bytes "
-                                   "registered as allocated", bytes, expected_size).c_str());
-        break;
-      }
+    expected_size = native_bytes_allocated_.load();
+    new_size = expected_size - bytes;
+    if (UNLIKELY(new_size < 0)) {
+      ScopedObjectAccess soa(env);
+      env->ThrowNew(WellKnownClasses::java_lang_RuntimeException,
+                    StringPrintf("Attempted to free %d native bytes with only %d native bytes "
+                                 "registered as allocated", bytes, expected_size).c_str());
+      break;
+    }
   } while (!native_bytes_allocated_.compare_and_swap(expected_size, new_size));
 }
 
 int64_t Heap::GetTotalMemory() const {
   int64_t ret = 0;
   for (const auto& space : continuous_spaces_) {
-    if (space->IsImageSpace()) {
-      // Currently don't include the image space.
-    } else if (space->IsDlMallocSpace()) {
-      // Zygote or alloc space
-      ret += space->AsDlMallocSpace()->GetFootprint();
+    // Currently don't include the image space.
+    if (!space->IsImageSpace()) {
+      ret += space->Size();
     }
   }
   for (const auto& space : discontinuous_spaces_) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 7d2441b..8c5746d 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -26,11 +26,14 @@
 #include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table.h"
 #include "gc/collector/gc_type.h"
+#include "gc/collector_type.h"
 #include "globals.h"
 #include "gtest/gtest.h"
 #include "jni.h"
 #include "locks.h"
 #include "offsets.h"
+#include "reference_queue.h"
+#include "root_visitor.h"
 #include "safe_map.h"
 #include "thread_pool.h"
 
@@ -57,16 +60,21 @@
 namespace collector {
   class GarbageCollector;
   class MarkSweep;
+  class SemiSpace;
 }  // namespace collector
 
 namespace space {
   class AllocSpace;
+  class BumpPointerSpace;
   class DiscontinuousSpace;
   class DlMallocSpace;
   class ImageSpace;
   class LargeObjectSpace;
+  class MallocSpace;
+  class RosAllocSpace;
   class Space;
   class SpaceTest;
+  class ContinuousMemMapAllocSpace;
 }  // namespace space
 
 class AgeCardVisitor {
@@ -80,6 +88,13 @@
   }
 };
 
+// Different types of allocators.
+enum AllocatorType {
+  kAllocatorTypeBumpPointer,
+  kAllocatorTypeFreeList,  // ROSAlloc / dlmalloc
+  kAllocatorTypeLOS,  // Large object space.
+};
+
 // What caused the GC?
 enum GcCause {
   // GC triggered by a failed allocation. Thread doing allocation is blocked waiting for GC before
@@ -101,13 +116,16 @@
 };
 static constexpr HeapVerificationMode kDesiredHeapVerification = kNoHeapVerification;
 
-// If true, measure the total allocation time.
-static constexpr bool kMeasureAllocationTime = false;
-// Primitive arrays larger than this size are put in the large object space.
-static constexpr size_t kLargeObjectThreshold = 3 * kPageSize;
+// If true, use rosalloc/RosAllocSpace instead of dlmalloc/DlMallocSpace
+static constexpr bool kUseRosAlloc = true;
 
 class Heap {
  public:
+  // If true, measure the total allocation time.
+  static constexpr bool kMeasureAllocationTime = false;
+  // Primitive arrays larger than this size are put in the large object space.
+  static constexpr size_t kLargeObjectThreshold = 3 * kPageSize;
+
   static constexpr size_t kDefaultInitialSize = 2 * MB;
   static constexpr size_t kDefaultMaximumSize = 32 * MB;
   static constexpr size_t kDefaultMaxFree = 2 * MB;
@@ -126,33 +144,63 @@
   // ImageWriter output.
   explicit Heap(size_t initial_size, size_t growth_limit, size_t min_free,
                 size_t max_free, double target_utilization, size_t capacity,
-                const std::string& original_image_file_name, bool concurrent_gc,
+                const std::string& original_image_file_name, CollectorType collector_type_,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
-                size_t long_pause_threshold, size_t long_gc_threshold, bool ignore_max_footprint);
+                size_t long_pause_threshold, size_t long_gc_threshold,
+                bool ignore_max_footprint);
 
   ~Heap();
 
   // Allocates and initializes storage for an object instance.
-  mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
+  template <const bool kInstrumented>
+  inline mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return AllocObjectInstrumented(self, klass, num_bytes);
+    return AllocObjectWithAllocator<kInstrumented>(self, klass, num_bytes, GetCurrentAllocator());
   }
-  mirror::Object* AllocObjectInstrumented(Thread* self, mirror::Class* klass, size_t num_bytes)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::Object* AllocObjectUninstrumented(Thread* self, mirror::Class* klass, size_t num_bytes)
+  template <const bool kInstrumented>
+  inline mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass,
+                                               size_t num_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocObjectWithAllocator<kInstrumented>(self, klass, num_bytes,
+                                                   GetCurrentNonMovingAllocator());
+  }
+  template <bool kInstrumented, typename PreFenceVisitor = VoidFunctor>
+  ALWAYS_INLINE mirror::Object* AllocObjectWithAllocator(
+      Thread* self, mirror::Class* klass, size_t byte_count, AllocatorType allocator,
+      const PreFenceVisitor& pre_fence_visitor = VoidFunctor())
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void DebugCheckPreconditionsForAllobObject(mirror::Class* c, size_t byte_count)
+  AllocatorType GetCurrentAllocator() const {
+    return current_allocator_;
+  }
+
+  AllocatorType GetCurrentNonMovingAllocator() const {
+    return current_non_moving_allocator_;
+  }
+
+  // Visit all of the live objects in the heap.
+  void VisitObjects(ObjectVisitorCallback callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
+
+  void SwapSemiSpaces() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void DebugCheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation);
 
   void RegisterNativeAllocation(JNIEnv* env, int bytes);
   void RegisterNativeFree(JNIEnv* env, int bytes);
 
+  // Change the allocator, updates entrypoints.
+  void ChangeAllocator(AllocatorType allocator);
+
+  // Change the collector to be one of the possible options (MS, CMS, SS).
+  void ChangeCollector(CollectorType collector_type);
+
   // The given reference is believed to be to an object in the Java heap, check the soundness of it.
   void VerifyObjectImpl(const mirror::Object* o);
   void VerifyObject(const mirror::Object* o) {
-    if (o != NULL && this != NULL && verify_object_mode_ > kNoHeapVerification) {
+    if (o != nullptr && this != nullptr && verify_object_mode_ > kNoHeapVerification) {
       VerifyObjectImpl(o);
     }
   }
@@ -169,7 +217,10 @@
   // A weaker test than IsLiveObject or VerifyObject that doesn't require the heap lock,
   // and doesn't abort on error, allowing the caller to report more
   // meaningful diagnostics.
-  bool IsHeapAddress(const mirror::Object* obj);
+  bool IsValidObjectAddress(const mirror::Object* obj) const;
+
+  // Returns true if the address passed in is a heap address, doesn't need to be aligned.
+  bool IsHeapAddress(const mirror::Object* obj) const;
 
   // Returns true if 'obj' is a live heap object, false otherwise (including for invalid addresses).
   // Requires the heap lock to be held.
@@ -177,6 +228,17 @@
                           bool search_live_stack = true, bool sorted = false)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Returns true if there is any chance that the object (obj) will move.
+  bool IsMovableObject(const mirror::Object* obj) const;
+
+  // Returns true if an object is in the temp space, if this happens its usually indicative of
+  // compaction related errors.
+  bool IsInTempSpace(const mirror::Object* obj) const;
+
+  // Enables us to prevent GC until objects are released.
+  void IncrementDisableGC(Thread* self);
+  void DecrementDisableGC(Thread* self);
+
   // Initiates an explicit garbage collection.
   void CollectGarbage(bool clear_soft_references) LOCKS_EXCLUDED(Locks::mutator_lock_);
 
@@ -221,9 +283,9 @@
   // from the system. Doesn't allow the space to exceed its growth limit.
   void SetIdealFootprint(size_t max_allowed_footprint);
 
-  // Blocks the caller until the garbage collector becomes idle and returns
-  // true if we waited for the GC to complete.
-  collector::GcType WaitForConcurrentGcToComplete(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
+  // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
+  // waited for.
+  collector::GcType WaitForGcToComplete(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
 
   const std::vector<space::ContinuousSpace*>& GetContinuousSpaces() const {
     return continuous_spaces_;
@@ -238,29 +300,26 @@
                            MemberOffset reference_queueNext_offset,
                            MemberOffset reference_pendingNext_offset,
                            MemberOffset finalizer_reference_zombie_offset);
-
-  mirror::Object* GetReferenceReferent(mirror::Object* reference);
-  void ClearReferenceReferent(mirror::Object* reference) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Returns true if the reference object has not yet been enqueued.
-  bool IsEnqueuable(const mirror::Object* ref);
-  void EnqueueReference(mirror::Object* ref, mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsEnqueued(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void EnqueuePendingReference(mirror::Object* ref, mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::Object* DequeuePendingReference(mirror::Object** list)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  MemberOffset GetReferencePendingNextOffset() {
-    DCHECK_NE(reference_pendingNext_offset_.Uint32Value(), 0U);
+  MemberOffset GetReferenceReferentOffset() const {
+    return reference_referent_offset_;
+  }
+  MemberOffset GetReferenceQueueOffset() const {
+    return reference_queue_offset_;
+  }
+  MemberOffset GetReferenceQueueNextOffset() const {
+    return reference_queueNext_offset_;
+  }
+  MemberOffset GetReferencePendingNextOffset() const {
     return reference_pendingNext_offset_;
   }
-
-  MemberOffset GetFinalizerReferenceZombieOffset() {
-    DCHECK_NE(finalizer_reference_zombie_offset_.Uint32Value(), 0U);
+  MemberOffset GetFinalizerReferenceZombieOffset() const {
     return finalizer_reference_zombie_offset_;
   }
+  static mirror::Object* PreserveSoftReferenceCallback(mirror::Object* obj, void* arg);
+  void ProcessReferences(TimingLogger& timings, bool clear_soft, RootVisitor* is_marked_callback,
+                         RootVisitor* recursive_mark_object_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Enable verification of object references when the runtime is sufficiently initialized.
   void EnableObjectValidation() {
@@ -300,6 +359,10 @@
     card_table_->MarkCard(dst);
   }
 
+  void WriteBarrierEveryFieldOf(const mirror::Object* obj) {
+    card_table_->MarkCard(obj);
+  }
+
   accounting::CardTable* GetCardTable() const {
     return card_table_.get();
   }
@@ -312,7 +375,7 @@
   }
 
   // Returns the number of objects currently allocated.
-  size_t GetObjectsAllocated() const;
+  size_t GetObjectsAllocated() const LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   // Returns the total number of objects allocated since the heap was created.
   size_t GetObjectsAllocatedEver() const;
@@ -357,7 +420,11 @@
 
   void DumpForSigQuit(std::ostream& os);
 
-  size_t Trim();
+  // Trim the managed and native heaps by releasing unused memory back to the OS.
+  void Trim();
+
+  void RevokeThreadLocalBuffers(Thread* thread);
+  void RevokeAllThreadLocalBuffers();
 
   accounting::HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     return live_bitmap_.get();
@@ -371,7 +438,7 @@
     return live_stack_.get();
   }
 
-  void PreZygoteFork() LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void PreZygoteFork() NO_THREAD_SAFETY_ANALYSIS;
 
   // Mark and empty stack.
   void FlushAllocStack()
@@ -382,6 +449,10 @@
                       accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Mark the specified allocation stack as live.
+  void MarkAllocStackAsLive(accounting::ObjectStack* stack)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   // Gets called when we get notified by ActivityThread that the process state has changed.
   void ListenForProcessStateChange();
 
@@ -389,31 +460,15 @@
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
 
-  space::DlMallocSpace* GetAllocSpace() const {
-    return alloc_space_;
+  space::MallocSpace* GetNonMovingSpace() const {
+    return non_moving_space_;
   }
 
   space::LargeObjectSpace* GetLargeObjectsSpace() const {
     return large_object_space_;
   }
 
-  Mutex* GetSoftRefQueueLock() {
-    return soft_ref_queue_lock_;
-  }
-
-  Mutex* GetWeakRefQueueLock() {
-    return weak_ref_queue_lock_;
-  }
-
-  Mutex* GetFinalizerRefQueueLock() {
-    return finalizer_ref_queue_lock_;
-  }
-
-  Mutex* GetPhantomRefQueueLock() {
-    return phantom_ref_queue_lock_;
-  }
-
-  void DumpSpaces();
+  void DumpSpaces(std::ostream& stream = LOG(INFO));
 
   // GC performance measuring
   void DumpGcPerformanceInfo(std::ostream& os);
@@ -438,55 +493,41 @@
   accounting::ModUnionTable* FindModUnionTableFromSpace(space::Space* space);
   void AddModUnionTable(accounting::ModUnionTable* mod_union_table);
 
+  bool IsCompilingBoot() const;
+  bool HasImageSpace() const;
+
  private:
-  bool TryAllocLargeObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count,
-                                       mirror::Object** obj_ptr, size_t* bytes_allocated)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool TryAllocLargeObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count,
-                                         mirror::Object** obj_ptr, size_t* bytes_allocated)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count);
-  void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, mirror::Object* obj);
+  void Compact(space::ContinuousMemMapAllocSpace* target_space,
+               space::ContinuousMemMapAllocSpace* source_space);
 
-  // Allocates uninitialized storage. Passing in a null space tries to place the object in the
-  // large object space.
-  template <class T> mirror::Object* AllocateInstrumented(Thread* self, T* space, size_t num_bytes,
-                                                          size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  template <class T> mirror::Object* AllocateUninstrumented(Thread* self, T* space, size_t num_bytes,
-                                                            size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static bool AllocatorHasAllocationStack(AllocatorType allocator_type) {
+    return allocator_type != kAllocatorTypeBumpPointer;
+  }
+  static bool AllocatorHasConcurrentGC(AllocatorType allocator_type) {
+    return allocator_type != kAllocatorTypeBumpPointer;
+  }
+  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const;
+  ALWAYS_INLINE void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
+                                       mirror::Object* obj);
 
   // Handles Allocate()'s slow allocation path with GC involved after
   // an initial allocation attempt failed.
-  mirror::Object* AllocateInternalWithGc(Thread* self, space::AllocSpace* space, size_t num_bytes,
+  mirror::Object* AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t num_bytes,
                                          size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Try to allocate a number of bytes, this function never does any GCs.
-  mirror::Object* TryToAllocateInstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                            bool grow, size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+  // Allocate into a specific space.
+  mirror::Object* AllocateInto(Thread* self, space::AllocSpace* space, mirror::Class* c,
+                               size_t bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Try to allocate a number of bytes, this function never does any GCs. DlMallocSpace-specialized version.
-  mirror::Object* TryToAllocateInstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                            bool grow, size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                              bool grow, size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                              bool grow, size_t* bytes_allocated)
-      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+  // Try to allocate a number of bytes, this function never does any GCs. Needs to be inlined so
+  // that the switch statement is constant optimized in the entrypoints.
+  template <const bool kInstrumented>
+  ALWAYS_INLINE mirror::Object* TryToAllocate(Thread* self, AllocatorType allocator_type,
+                                              size_t alloc_size, bool grow,
+                                              size_t* bytes_allocated)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation)
@@ -494,7 +535,28 @@
   bool IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow);
 
   // Pushes a list of cleared references out to the managed heap.
-  void EnqueueClearedReferences(mirror::Object** cleared_references);
+  void SetReferenceReferent(mirror::Object* reference, mirror::Object* referent)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* GetReferenceReferent(mirror::Object* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ClearReferenceReferent(mirror::Object* reference)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetReferenceReferent(reference, nullptr);
+  }
+  void EnqueueClearedReferences();
+  // Returns true if the reference object has not yet been enqueued.
+  bool IsEnqueuable(const mirror::Object* ref) const;
+  bool IsEnqueued(mirror::Object* ref) const;
+  void DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj, RootVisitor mark_visitor,
+                              void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Run the finalizers.
+  void RunFinalization(JNIEnv* env);
+
+  // Blocks the caller until the garbage collector becomes idle and returns the type of GC we
+  // waited for.
+  collector::GcType WaitForGcToCompleteLocked(Thread* self)
+      EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_);
 
   void RequestHeapTrim() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
@@ -533,9 +595,7 @@
 
   size_t GetPercentFree();
 
-  void AddContinuousSpace(space::ContinuousSpace* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  void AddDiscontinuousSpace(space::DiscontinuousSpace* space)
-      LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
+  void AddSpace(space::Space* space) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   // No thread saftey analysis since we call this everywhere and it is impossible to find a proper
   // lock ordering for it.
@@ -548,7 +608,7 @@
   void SwapStacks();
 
   // Clear cards and update the mod union table.
-  void ProcessCards(base::TimingLogger& timings);
+  void ProcessCards(TimingLogger& timings);
 
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;
@@ -556,8 +616,12 @@
   // All-known discontinuous spaces, where objects may be placed throughout virtual memory.
   std::vector<space::DiscontinuousSpace*> discontinuous_spaces_;
 
-  // The allocation space we are currently allocating into.
-  space::DlMallocSpace* alloc_space_;
+  // All-known alloc spaces, where objects may be or have been allocated.
+  std::vector<space::AllocSpace*> alloc_spaces_;
+
+  // A space where non-movable objects are allocated, when compaction is enabled it contains
+  // Classes, ArtMethods, ArtFields, and non moving objects.
+  space::MallocSpace* non_moving_space_;
 
   // The large object space we are currently allocating into.
   space::LargeObjectSpace* large_object_space_;
@@ -572,6 +636,9 @@
   // false for stop-the-world mark sweep.
   const bool concurrent_gc_;
 
+  // The current collector type.
+  CollectorType collector_type_;
+
   // How many GC threads we may use for paused parts of garbage collection.
   const size_t parallel_gc_threads_;
 
@@ -595,17 +662,22 @@
   // If we have a zygote space.
   bool have_zygote_space_;
 
+  // Number of pinned primitive arrays in the movable space.
+  // Block all GC until this hits zero, or we hit the timeout!
+  size_t number_gc_blockers_;
+  static constexpr size_t KGCBlockTimeout = 30000;
+
   // Guards access to the state of GC, associated conditional variable is used to signal when a GC
   // completes.
   Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   UniquePtr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
 
-  // Mutexes held when adding references to reference queues.
-  // TODO: move to a UniquePtr, currently annotalysis is confused that UniquePtr isn't lockable.
-  Mutex* soft_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Mutex* weak_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Mutex* finalizer_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Mutex* phantom_ref_queue_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Reference queues.
+  ReferenceQueue soft_reference_queue_;
+  ReferenceQueue weak_reference_queue_;
+  ReferenceQueue finalizer_reference_queue_;
+  ReferenceQueue phantom_reference_queue_;
+  ReferenceQueue cleared_references_;
 
   // True while the garbage collector is running.
   volatile bool is_gc_running_ GUARDED_BY(gc_complete_lock_);
@@ -631,6 +703,9 @@
   // The watermark at which a GC is performed inside of registerNativeAllocation.
   size_t native_footprint_limit_;
 
+  // Whether or not we need to run finalizers in the next native allocation.
+  bool native_need_to_run_finalization_;
+
   // Activity manager members.
   jclass activity_thread_class_;
   jclass application_thread_class_;
@@ -704,24 +779,31 @@
   // Allocation stack, new allocations go here so that we can do sticky mark bits. This enables us
   // to use the live bitmap as the old mark bitmap.
   const size_t max_allocation_stack_size_;
-  bool is_allocation_stack_sorted_;
   UniquePtr<accounting::ObjectStack> allocation_stack_;
 
   // Second allocation stack so that we can process allocation with the heap unlocked.
   UniquePtr<accounting::ObjectStack> live_stack_;
 
+  // Allocator type.
+  AllocatorType current_allocator_;
+  const AllocatorType current_non_moving_allocator_;
+
+  // Which GCs we run in order when we an allocation fails.
+  std::vector<collector::GcType> gc_plan_;
+
+  // Bump pointer spaces.
+  space::BumpPointerSpace* bump_pointer_space_;
+  // Temp space is the space which the semispace collector copies to.
+  space::BumpPointerSpace* temp_space_;
+
   // offset of java.lang.ref.Reference.referent
   MemberOffset reference_referent_offset_;
-
   // offset of java.lang.ref.Reference.queue
   MemberOffset reference_queue_offset_;
-
   // offset of java.lang.ref.Reference.queueNext
   MemberOffset reference_queueNext_offset_;
-
   // offset of java.lang.ref.Reference.pendingNext
   MemberOffset reference_pendingNext_offset_;
-
   // offset of java.lang.ref.FinalizerReference.zombie
   MemberOffset finalizer_reference_zombie_offset_;
 
@@ -744,11 +826,17 @@
   // The current state of heap verification, may be enabled or disabled.
   HeapVerificationMode verify_object_mode_;
 
-  std::vector<collector::MarkSweep*> mark_sweep_collectors_;
+  // GC disable count, error on GC if > 0.
+  size_t gc_disable_count_ GUARDED_BY(gc_complete_lock_);
+
+  std::vector<collector::GarbageCollector*> garbage_collectors_;
+  collector::SemiSpace* semi_space_collector_;
 
   const bool running_on_valgrind_;
 
   friend class collector::MarkSweep;
+  friend class collector::SemiSpace;
+  friend class ReferenceQueue;
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
diff --git a/runtime/gc/heap_test.cc b/runtime/gc/heap_test.cc
index 02708e8..8af2725 100644
--- a/runtime/gc/heap_test.cc
+++ b/runtime/gc/heap_test.cc
@@ -43,12 +43,14 @@
     ScopedObjectAccess soa(Thread::Current());
     // garbage is created during ClassLinker::Init
 
-    mirror::Class* c = class_linker_->FindSystemClass("[Ljava/lang/Object;");
+    SirtRef<mirror::Class> c(soa.Self(), class_linker_->FindSystemClass("[Ljava/lang/Object;"));
     for (size_t i = 0; i < 1024; ++i) {
       SirtRef<mirror::ObjectArray<mirror::Object> > array(soa.Self(),
-          mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), c, 2048));
+          mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), c.get(), 2048));
       for (size_t j = 0; j < 2048; ++j) {
-        array->Set(j, mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!"));
+        mirror::String* string = mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!");
+        // SIRT operator -> deferences the SIRT before running the method.
+        array->Set(j, string);
       }
     }
   }
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
new file mode 100644
index 0000000..d006349
--- /dev/null
+++ b/runtime/gc/reference_queue.cc
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reference_queue.h"
+
+#include "accounting/card_table-inl.h"
+#include "heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+
+namespace art {
+namespace gc {
+
+ReferenceQueue::ReferenceQueue(Heap* heap)
+    : lock_("reference queue lock"),
+      heap_(heap),
+      list_(nullptr) {
+}
+
+void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Object* ref) {
+  DCHECK(ref != NULL);
+  MutexLock mu(self, lock_);
+  if (!heap_->IsEnqueued(ref)) {
+    EnqueuePendingReference(ref);
+  }
+}
+
+void ReferenceQueue::EnqueueReference(mirror::Object* ref) {
+  CHECK(heap_->IsEnqueuable(ref));
+  EnqueuePendingReference(ref);
+}
+
+void ReferenceQueue::EnqueuePendingReference(mirror::Object* ref) {
+  DCHECK(ref != NULL);
+  MemberOffset pending_next_offset = heap_->GetReferencePendingNextOffset();
+  DCHECK_NE(pending_next_offset.Uint32Value(), 0U);
+  if (IsEmpty()) {
+    // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
+    ref->SetFieldObject(pending_next_offset, ref, false);
+    list_ = ref;
+  } else {
+    mirror::Object* head =
+        list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    ref->SetFieldObject(pending_next_offset, head, false);
+    list_->SetFieldObject(pending_next_offset, ref, false);
+  }
+}
+
+mirror::Object* ReferenceQueue::DequeuePendingReference() {
+  DCHECK(!IsEmpty());
+  MemberOffset pending_next_offset = heap_->GetReferencePendingNextOffset();
+  mirror::Object* head = list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+  DCHECK(head != nullptr);
+  mirror::Object* ref;
+  // Note: the following code is thread-safe because it is only called from ProcessReferences which
+  // is single threaded.
+  if (list_ == head) {
+    ref = list_;
+    list_ = nullptr;
+  } else {
+    mirror::Object* next = head->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    list_->SetFieldObject(pending_next_offset, next, false);
+    ref = head;
+  }
+  ref->SetFieldObject(pending_next_offset, nullptr, false);
+  return ref;
+}
+
+void ReferenceQueue::Dump(std::ostream& os) const {
+  mirror::Object* cur = list_;
+  os << "Reference starting at list_=" << list_ << "\n";
+  while (cur != nullptr) {
+    mirror::Object* pending_next =
+        cur->GetFieldObject<mirror::Object*>(heap_->GetReferencePendingNextOffset(), false);
+    os << "PendingNext=" << pending_next;
+    if (cur->GetClass()->IsFinalizerReferenceClass()) {
+      os << " Zombie=" <<
+          cur->GetFieldObject<mirror::Object*>(heap_->GetFinalizerReferenceZombieOffset(), false);
+    }
+    os << "\n";
+    cur = pending_next;
+  }
+}
+
+void ReferenceQueue::ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor,
+                                          void* arg) {
+  while (!IsEmpty()) {
+    mirror::Object* ref = DequeuePendingReference();
+    mirror::Object* referent = heap_->GetReferenceReferent(ref);
+    if (referent != nullptr) {
+      mirror::Object* forward_address = visitor(referent, arg);
+      if (forward_address == nullptr) {
+        // Referent is white, clear it.
+        heap_->ClearReferenceReferent(ref);
+        if (heap_->IsEnqueuable(ref)) {
+          cleared_references.EnqueuePendingReference(ref);
+        }
+      } else if (referent != forward_address) {
+        // Object moved, need to updated the referrent.
+        heap_->SetReferenceReferent(ref, forward_address);
+      }
+    }
+  }
+}
+
+void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
+                                                RootVisitor is_marked_callback,
+                                                RootVisitor recursive_mark_callback, void* arg) {
+  while (!IsEmpty()) {
+    mirror::Object* ref = DequeuePendingReference();
+    mirror::Object* referent = heap_->GetReferenceReferent(ref);
+    if (referent != nullptr) {
+      mirror::Object* forward_address = is_marked_callback(referent, arg);
+      // If the referent isn't marked, mark it and update the
+      if (forward_address == nullptr) {
+        forward_address = recursive_mark_callback(referent, arg);
+        // If the referent is non-null the reference must queuable.
+        DCHECK(heap_->IsEnqueuable(ref));
+        // Move the updated referent to the zombie field.
+        ref->SetFieldObject(heap_->GetFinalizerReferenceZombieOffset(), forward_address, false);
+        heap_->ClearReferenceReferent(ref);
+        cleared_references.EnqueueReference(ref);
+      } else if (referent != forward_address) {
+        heap_->SetReferenceReferent(ref, forward_address);
+      }
+    }
+  }
+}
+
+void ReferenceQueue::PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg) {
+  ReferenceQueue cleared(heap_);
+  while (!IsEmpty()) {
+    mirror::Object* ref = DequeuePendingReference();
+    mirror::Object* referent = heap_->GetReferenceReferent(ref);
+    if (referent != nullptr) {
+      mirror::Object* forward_address = preserve_callback(referent, arg);
+      if (forward_address == nullptr) {
+        // Either the reference isn't marked or we don't wish to preserve it.
+        cleared.EnqueuePendingReference(ref);
+      } else {
+        heap_->SetReferenceReferent(ref, forward_address);
+      }
+    }
+  }
+  list_ = cleared.GetList();
+}
+
+}  // namespace gc
+}  // namespace art
+
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
new file mode 100644
index 0000000..89589c3
--- /dev/null
+++ b/runtime/gc/reference_queue.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_REFERENCE_QUEUE_H_
+#define ART_RUNTIME_GC_REFERENCE_QUEUE_H_
+
+#include <iosfwd>
+#include <string>
+#include <vector>
+
+#include "atomic_integer.h"
+#include "base/timing_logger.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+#include "jni.h"
+#include "locks.h"
+#include "offsets.h"
+#include "root_visitor.h"
+#include "thread_pool.h"
+
+namespace art {
+namespace gc {
+
+class Heap;
+
+// Used to temporarily store java.lang.ref.Reference(s) during GC and prior to queueing on the
+// appropriate java.lang.ref.ReferenceQueue. The linked list is maintained in the
+// java.lang.ref.Reference objects.
+class ReferenceQueue {
+ public:
+  explicit ReferenceQueue(Heap* heap);
+  // Enqueue a reference if is not already enqueued. Thread safe to call from multiple threads
+  // since it uses a lock to avoid a race between checking for the references presence and adding
+  // it.
+  void AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Object* ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
+  // Enqueue a reference, unlike EnqueuePendingReference, enqueue reference checks that the
+  // reference IsEnqueueable. Not thread safe, used when mutators are paused to minimize lock
+  // overhead.
+  void EnqueueReference(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void EnqueuePendingReference(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* DequeuePendingReference() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Enqueues finalizer references with white referents.  White referents are blackened, moved to the
+  // zombie field, and the referent field is cleared.
+  void EnqueueFinalizerReferences(ReferenceQueue& cleared_references,
+                                  RootVisitor is_marked_callback,
+                                  RootVisitor recursive_mark_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Walks the reference list marking any references subject to the reference clearing policy.
+  // References with a black referent are removed from the list.  References with white referents
+  // biased toward saving are blackened and also removed from the list.
+  void PreserveSomeSoftReferences(RootVisitor preserve_callback, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Unlink the reference list clearing references objects with white referents.  Cleared references
+  // registered to a reference queue are scheduled for appending by the heap worker thread.
+  void ClearWhiteReferences(ReferenceQueue& cleared_references, RootVisitor visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Dump(std::ostream& os) const
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsEmpty() const {
+    return list_ == nullptr;
+  }
+  void Clear() {
+    list_ = nullptr;
+  }
+  mirror::Object* GetList() {
+    return list_;
+  }
+
+ private:
+  // Lock, used for parallel GC reference enqueuing. It allows for multiple threads simultaneously
+  // calling AtomicEnqueueIfNotEnqueued.
+  Mutex lock_;
+  // The heap contains the reference offsets.
+  Heap* const heap_;
+  // The actual reference list. Not a root since it will be nullptr when the GC is not running.
+  mirror::Object* list_;
+};
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_REFERENCE_QUEUE_H_
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
new file mode 100644
index 0000000..85ef2f4
--- /dev/null
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_INL_H_
+
+#include "bump_pointer_space.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+inline mirror::Object* BumpPointerSpace::AllocNonvirtual(size_t num_bytes) {
+  num_bytes = RoundUp(num_bytes, kAlignment);
+  byte* old_end;
+  byte* new_end;
+  do {
+    old_end = end_;
+    new_end = old_end + num_bytes;
+    // If there is no more room in the region, we are out of memory.
+    if (UNLIKELY(new_end > growth_end_)) {
+      return nullptr;
+    }
+    // TODO: Use a cas which always equals the size of pointers.
+  } while (android_atomic_cas(reinterpret_cast<int32_t>(old_end),
+                              reinterpret_cast<int32_t>(new_end),
+                              reinterpret_cast<volatile int32_t*>(&end_)) != 0);
+  // TODO: Less statistics?
+  total_bytes_allocated_.fetch_add(num_bytes);
+  num_objects_allocated_.fetch_add(1);
+  total_objects_allocated_.fetch_add(1);
+  return reinterpret_cast<mirror::Object*>(old_end);
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_INL_H_
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
new file mode 100644
index 0000000..06ba57e
--- /dev/null
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bump_pointer_space.h"
+#include "bump_pointer_space-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/class-inl.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+BumpPointerSpace* BumpPointerSpace::Create(const std::string& name, size_t capacity,
+                                           byte* requested_begin) {
+  capacity = RoundUp(capacity, kPageSize);
+  std::string error_msg;
+  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
+                                                 PROT_READ | PROT_WRITE, &error_msg));
+  if (mem_map.get() == nullptr) {
+    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
+        << PrettySize(capacity) << " with message " << error_msg;
+    return nullptr;
+  }
+  return new BumpPointerSpace(name, mem_map.release());
+}
+
+BumpPointerSpace::BumpPointerSpace(const std::string& name, byte* begin, byte* limit)
+    : ContinuousMemMapAllocSpace(name, nullptr, begin, begin, limit,
+                                 kGcRetentionPolicyAlwaysCollect),
+      num_objects_allocated_(0), total_bytes_allocated_(0), total_objects_allocated_(0),
+      growth_end_(limit) {
+}
+
+BumpPointerSpace::BumpPointerSpace(const std::string& name, MemMap* mem_map)
+    : ContinuousMemMapAllocSpace(name, mem_map, mem_map->Begin(), mem_map->Begin(), mem_map->End(),
+                                 kGcRetentionPolicyAlwaysCollect),
+      num_objects_allocated_(0), total_bytes_allocated_(0), total_objects_allocated_(0),
+      growth_end_(mem_map->End()) {
+}
+
+mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* ret = AllocNonvirtual(num_bytes);
+  if (LIKELY(ret != nullptr)) {
+    *bytes_allocated = num_bytes;
+  }
+  return ret;
+}
+
+size_t BumpPointerSpace::AllocationSize(const mirror::Object* obj) {
+  return AllocationSizeNonvirtual(obj);
+}
+
+void BumpPointerSpace::Clear() {
+  // Release the pages back to the operating system.
+  CHECK_NE(madvise(Begin(), Limit() - Begin(), MADV_DONTNEED), -1) << "madvise failed";
+  // Reset the end of the space back to the beginning, we move the end forward as we allocate
+  // objects.
+  SetEnd(Begin());
+  growth_end_ = Limit();
+  num_objects_allocated_ = 0;
+}
+
+void BumpPointerSpace::Dump(std::ostream& os) const {
+  os << reinterpret_cast<void*>(Begin()) << "-" << reinterpret_cast<void*>(End()) << " - "
+     << reinterpret_cast<void*>(Limit());
+}
+
+mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) {
+  const uintptr_t position = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf();
+  return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment));
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
new file mode 100644
index 0000000..2edd3e2
--- /dev/null
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
+
+#include "space.h"
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// A bump pointer space is a space where objects may be allocated and garbage collected.
+class BumpPointerSpace : public ContinuousMemMapAllocSpace {
+ public:
+  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
+
+  SpaceType GetType() const {
+    return kSpaceTypeBumpPointerSpace;
+  }
+
+  // Create a bump pointer space with the requested sizes. The requested base address is not
+  // guaranteed to be granted, if it is required, the caller should call Begin on the returned
+  // space to confirm the request was granted.
+  static BumpPointerSpace* Create(const std::string& name, size_t capacity, byte* requested_begin);
+
+  // Allocate num_bytes, returns nullptr if the space is full.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+  mirror::Object* AllocNonvirtual(size_t num_bytes);
+
+  // Return the storage space required by obj.
+  virtual size_t AllocationSize(const mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Nos unless we support free lists.
+  virtual size_t Free(Thread*, mirror::Object*) {
+    return 0;
+  }
+  virtual size_t FreeList(Thread*, size_t, mirror::Object**) {
+    return 0;
+  }
+
+  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return obj->SizeOf();
+  }
+
+  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
+  // maximum reserved size of the heap.
+  void ClearGrowthLimit() {
+    growth_end_ = Limit();
+  }
+
+  // Override capacity so that we only return the possibly limited capacity
+  size_t Capacity() const {
+    return growth_end_ - begin_;
+  }
+
+  // The total amount of memory reserved for the space.
+  size_t NonGrowthLimitCapacity() const {
+    return GetMemMap()->Size();
+  }
+
+  accounting::SpaceBitmap* GetLiveBitmap() const {
+    return nullptr;
+  }
+
+  accounting::SpaceBitmap* GetMarkBitmap() const {
+    return nullptr;
+  }
+
+  // Clear the memory and reset the pointer to the start of the space.
+  void Clear();
+
+  void Dump(std::ostream& os) const;
+
+  uint64_t GetBytesAllocated() {
+    return Size();
+  }
+
+  uint64_t GetObjectsAllocated() {
+    return num_objects_allocated_;
+  }
+
+  uint64_t GetTotalBytesAllocated() {
+    return total_bytes_allocated_;
+  }
+
+  uint64_t GetTotalObjectsAllocated() {
+    return total_objects_allocated_;
+  }
+
+  bool Contains(const mirror::Object* obj) const {
+    const byte* byte_obj = reinterpret_cast<const byte*>(obj);
+    return byte_obj >= Begin() && byte_obj < End();
+  }
+
+  // TODO: Change this? Mainly used for compacting to a particular region of memory.
+  BumpPointerSpace(const std::string& name, byte* begin, byte* limit);
+
+  // Return the object which comes after obj, while ensuring alignment.
+  static mirror::Object* GetNextObject(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  virtual BumpPointerSpace* AsBumpPointerSpace() {
+    return this;
+  }
+
+  // Object alignment within the space.
+  static constexpr size_t kAlignment = 8;
+
+ protected:
+  BumpPointerSpace(const std::string& name, MemMap* mem_map);
+
+  size_t InternalAllocationSize(const mirror::Object* obj);
+  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Approximate number of bytes which have been allocated into the space.
+  AtomicInteger num_objects_allocated_;
+  AtomicInteger total_bytes_allocated_;
+  AtomicInteger total_objects_allocated_;
+
+  byte* growth_end_;
+
+ private:
+  friend class collector::MarkSweep;
+  DISALLOW_COPY_AND_ASSIGN(BumpPointerSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index fb2c66b..c14a4e1 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
 
 #include "dlmalloc_space.h"
+#include "thread.h"
 
 namespace art {
 namespace gc {
@@ -28,7 +29,7 @@
   mirror::Object* obj;
   {
     MutexLock mu(self, lock_);
-    obj = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
   }
   if (LIKELY(obj != NULL)) {
     // Zero freshly allocated memory, done while not holding the space's lock.
@@ -37,8 +38,9 @@
   return obj;
 }
 
-inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated) {
-  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
+inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes,
+                                                               size_t* bytes_allocated) {
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_for_alloc_, num_bytes));
   if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 9ebc16a..10e9ed8 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -13,13 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "dlmalloc_space.h"
+
 #include "dlmalloc_space-inl.h"
 #include "gc/accounting/card_table.h"
 #include "gc/heap.h"
+#include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "thread.h"
+#include "thread_list.h"
 #include "utils.h"
 
 #include <valgrind.h>
@@ -29,170 +33,40 @@
 namespace gc {
 namespace space {
 
-// TODO: Remove define macro
-#define CHECK_MEMORY_CALL(call, args, what) \
-  do { \
-    int rc = call args; \
-    if (UNLIKELY(rc != 0)) { \
-      errno = rc; \
-      PLOG(FATAL) << # call << " failed for " << what; \
-    } \
-  } while (false)
-
 static const bool kPrefetchDuringDlMallocFreeList = true;
 
-// Number of bytes to use as a red zone (rdz). A red zone of this size will be placed before and
-// after each allocation. 8 bytes provides long/double alignment.
-const size_t kValgrindRedZoneBytes = 8;
-
-// A specialization of DlMallocSpace that provides information to valgrind wrt allocations.
-class ValgrindDlMallocSpace : public DlMallocSpace {
- public:
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
-    void* obj_with_rdz = DlMallocSpace::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes,
-                                                        bytes_allocated);
-    if (obj_with_rdz == NULL) {
-      return NULL;
-    }
-    mirror::Object* result = reinterpret_cast<mirror::Object*>(
-        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
-    // Make redzones as no access.
-    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
-    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
-    return result;
-  }
-
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
-    void* obj_with_rdz = DlMallocSpace::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes,
-                                              bytes_allocated);
-    if (obj_with_rdz == NULL) {
-     return NULL;
-    }
-    mirror::Object* result = reinterpret_cast<mirror::Object*>(
-        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
-    // Make redzones as no access.
-    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
-    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
-    return result;
-  }
-
-  virtual size_t AllocationSize(const mirror::Object* obj) {
-    size_t result = DlMallocSpace::AllocationSize(reinterpret_cast<const mirror::Object*>(
-        reinterpret_cast<const byte*>(obj) - kValgrindRedZoneBytes));
-    return result - 2 * kValgrindRedZoneBytes;
-  }
-
-  virtual size_t Free(Thread* self, mirror::Object* ptr) {
-    void* obj_after_rdz = reinterpret_cast<void*>(ptr);
-    void* obj_with_rdz = reinterpret_cast<byte*>(obj_after_rdz) - kValgrindRedZoneBytes;
-    // Make redzones undefined.
-    size_t allocation_size = DlMallocSpace::AllocationSize(
-        reinterpret_cast<mirror::Object*>(obj_with_rdz));
-    VALGRIND_MAKE_MEM_UNDEFINED(obj_with_rdz, allocation_size);
-    size_t freed = DlMallocSpace::Free(self, reinterpret_cast<mirror::Object*>(obj_with_rdz));
-    return freed - 2 * kValgrindRedZoneBytes;
-  }
-
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
-    size_t freed = 0;
-    for (size_t i = 0; i < num_ptrs; i++) {
-      freed += Free(self, ptrs[i]);
-    }
-    return freed;
-  }
-
-  ValgrindDlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
-                        byte* end, size_t growth_limit, size_t initial_size) :
-      DlMallocSpace(name, mem_map, mspace, begin, end, growth_limit) {
-    VALGRIND_MAKE_MEM_UNDEFINED(mem_map->Begin() + initial_size, mem_map->Size() - initial_size);
-  }
-
-  virtual ~ValgrindDlMallocSpace() {
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(ValgrindDlMallocSpace);
-};
-
-size_t DlMallocSpace::bitmap_index_ = 0;
-
 DlMallocSpace::DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin,
-                       byte* end, size_t growth_limit)
-    : MemMapSpace(name, mem_map, end - begin, kGcRetentionPolicyAlwaysCollect),
-      recent_free_pos_(0), total_bytes_freed_(0), total_objects_freed_(0),
-      lock_("allocation space lock", kAllocSpaceLock), mspace_(mspace),
-      growth_limit_(growth_limit) {
+                             byte* end, byte* limit, size_t growth_limit)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit),
+      total_bytes_freed_(0), total_objects_freed_(0), mspace_(mspace), mspace_for_alloc_(mspace) {
   CHECK(mspace != NULL);
-
-  size_t bitmap_index = bitmap_index_++;
-
-  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
-  live_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
-
-  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
-
-  for (auto& freed : recent_freed_objects_) {
-    freed.first = nullptr;
-    freed.second = nullptr;
-  }
 }
 
-DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t
-                                     growth_limit, size_t capacity, byte* requested_begin) {
-  // Memory we promise to dlmalloc before it asks for morecore.
-  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
-  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
-  // size of the large allocation) will be greater than the footprint limit.
-  size_t starting_size = kPageSize;
+DlMallocSpace* DlMallocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
   uint64_t start_time = 0;
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     start_time = NanoTime();
-    VLOG(startup) << "Space::CreateAllocSpace entering " << name
+    VLOG(startup) << "DlMallocSpace::Create entering " << name
                   << " initial_size=" << PrettySize(initial_size)
                   << " growth_limit=" << PrettySize(growth_limit)
                   << " capacity=" << PrettySize(capacity)
                   << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
   }
 
-  // Sanity check arguments
-  if (starting_size > initial_size) {
-    initial_size = starting_size;
-  }
-  if (initial_size > growth_limit) {
-    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the initial size ("
-        << PrettySize(initial_size) << ") is larger than its capacity ("
-        << PrettySize(growth_limit) << ")";
+  // Memory we promise to dlmalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as dlmalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
     return NULL;
   }
-  if (growth_limit > capacity) {
-    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the growth limit capacity ("
-        << PrettySize(growth_limit) << ") is larger than the capacity ("
-        << PrettySize(capacity) << ")";
-    return NULL;
-  }
-
-  // Page align growth limit and capacity which will be used to manage mmapped storage
-  growth_limit = RoundUp(growth_limit, kPageSize);
-  capacity = RoundUp(capacity, kPageSize);
-
-  std::string error_msg;
-  UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
-                                                 PROT_READ | PROT_WRITE, &error_msg));
-  if (mem_map.get() == NULL) {
-    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
-        << PrettySize(capacity) << ": " << error_msg;
-    return NULL;
-  }
-
-  void* mspace = CreateMallocSpace(mem_map->Begin(), starting_size, initial_size);
+  void* mspace = CreateMspace(mem_map->Begin(), starting_size, initial_size);
   if (mspace == NULL) {
     LOG(ERROR) << "Failed to initialize mspace for alloc space (" << name << ")";
     return NULL;
@@ -205,22 +79,23 @@
   }
 
   // Everything is set so record in immutable structure and leave
-  MemMap* mem_map_ptr = mem_map.release();
   DlMallocSpace* space;
+  byte* begin = mem_map->Begin();
   if (RUNNING_ON_VALGRIND > 0) {
-    space = new ValgrindDlMallocSpace(name, mem_map_ptr, mspace, mem_map_ptr->Begin(), end,
-                                      growth_limit, initial_size);
+    space = new ValgrindMallocSpace<DlMallocSpace, void*>(
+        name, mem_map, mspace, begin, end, begin + capacity, growth_limit, initial_size);
   } else {
-    space = new DlMallocSpace(name, mem_map_ptr, mspace, mem_map_ptr->Begin(), end, growth_limit);
+    space = new DlMallocSpace(name, mem_map, mspace, begin, end, begin + capacity, growth_limit);
   }
+  // We start out with only the initial size possibly containing objects.
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
-    LOG(INFO) << "Space::CreateAllocSpace exiting (" << PrettyDuration(NanoTime() - start_time)
+    LOG(INFO) << "DlMallocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
         << " ) " << *space;
   }
   return space;
 }
 
-void* DlMallocSpace::CreateMallocSpace(void* begin, size_t morecore_start, size_t initial_size) {
+void* DlMallocSpace::CreateMspace(void* begin, size_t morecore_start, size_t initial_size) {
   // clear errno to allow PLOG on error
   errno = 0;
   // create mspace using our backing storage starting at begin and with a footprint of
@@ -236,14 +111,6 @@
   return msp;
 }
 
-void DlMallocSpace::SwapBitmaps() {
-  live_bitmap_.swap(mark_bitmap_);
-  // Swap names to get more descriptive diagnostics.
-  std::string temp_name(live_bitmap_->GetName());
-  live_bitmap_->SetName(mark_bitmap_->GetName());
-  mark_bitmap_->SetName(temp_name);
-}
-
 mirror::Object* DlMallocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
   return AllocNonvirtual(self, num_bytes, bytes_allocated);
 }
@@ -252,11 +119,11 @@
   mirror::Object* result;
   {
     MutexLock mu(self, lock_);
-    // Grow as much as possible within the mspace.
+    // Grow as much as possible within the space.
     size_t max_allowed = Capacity();
     mspace_set_footprint_limit(mspace_, max_allowed);
     // Try the allocation.
-    result = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
     // Shrink back down as small as possible.
     size_t footprint = mspace_footprint(mspace_);
     mspace_set_footprint_limit(mspace_, footprint);
@@ -270,82 +137,9 @@
   return result;
 }
 
-void DlMallocSpace::SetGrowthLimit(size_t growth_limit) {
-  growth_limit = RoundUp(growth_limit, kPageSize);
-  growth_limit_ = growth_limit;
-  if (Size() > growth_limit_) {
-    end_ = begin_ + growth_limit;
-  }
-}
-
-DlMallocSpace* DlMallocSpace::CreateZygoteSpace(const char* alloc_space_name) {
-  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
-  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
-  DCHECK(IsAligned<kPageSize>(begin_));
-  DCHECK(IsAligned<kPageSize>(end_));
-  size_t size = RoundUp(Size(), kPageSize);
-  // Trim the heap so that we minimize the size of the Zygote space.
-  Trim();
-  // TODO: Not hardcode these in?
-  const size_t starting_size = kPageSize;
-  const size_t initial_size = 2 * MB;
-  // Remaining size is for the new alloc space.
-  const size_t growth_limit = growth_limit_ - size;
-  const size_t capacity = Capacity() - size;
-  VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
-             << "End " << reinterpret_cast<const void*>(end_) << "\n"
-             << "Size " << size << "\n"
-             << "GrowthLimit " << growth_limit_ << "\n"
-             << "Capacity " << Capacity();
-  SetGrowthLimit(RoundUp(size, kPageSize));
-  SetFootprintLimit(RoundUp(size, kPageSize));
-  // FIXME: Do we need reference counted pointers here?
-  // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
-  VLOG(heap) << "Creating new AllocSpace: ";
-  VLOG(heap) << "Size " << GetMemMap()->Size();
-  VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
-  VLOG(heap) << "Capacity " << PrettySize(capacity);
-  // Remap the tail.
-  std::string error_msg;
-  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
-                                                    PROT_READ | PROT_WRITE, &error_msg));
-  CHECK(mem_map.get() != nullptr) << error_msg;
-  void* mspace = CreateMallocSpace(end_, starting_size, initial_size);
-  // Protect memory beyond the initial size.
-  byte* end = mem_map->Begin() + starting_size;
-  if (capacity - initial_size > 0) {
-    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
-  }
-  DlMallocSpace* alloc_space =
-      new DlMallocSpace(alloc_space_name, mem_map.release(), mspace, end_, end, growth_limit);
-  live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
-  CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
-  CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  VLOG(heap) << "zygote space creation done";
-  return alloc_space;
-}
-
-mirror::Class* DlMallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
-  size_t pos = recent_free_pos_;
-  // Start at the most recently freed object and work our way back since there may be duplicates
-  // caused by dlmalloc reusing memory.
-  if (kRecentFreeCount > 0) {
-    for (size_t i = 0; i + 1 < kRecentFreeCount + 1; ++i) {
-      pos = pos != 0 ? pos - 1 : kRecentFreeMask;
-      if (recent_freed_objects_[pos].first == obj) {
-        return recent_freed_objects_[pos].second;
-      }
-    }
-  }
-  return nullptr;
-}
-
-void DlMallocSpace::RegisterRecentFree(mirror::Object* ptr) {
-  recent_freed_objects_[recent_free_pos_].first = ptr;
-  recent_freed_objects_[recent_free_pos_].second = ptr->GetClass();
-  recent_free_pos_ = (recent_free_pos_ + 1) & kRecentFreeMask;
+MallocSpace* DlMallocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator, byte* begin, byte* end,
+                                           byte* limit, size_t growth_limit) {
+  return new DlMallocSpace(name, mem_map, allocator, begin, end, limit, growth_limit);
 }
 
 size_t DlMallocSpace::Free(Thread* self, mirror::Object* ptr) {
@@ -412,38 +206,9 @@
 // Callback from dlmalloc when it needs to increase the footprint
 extern "C" void* art_heap_morecore(void* mspace, intptr_t increment) {
   Heap* heap = Runtime::Current()->GetHeap();
-  DCHECK_EQ(heap->GetAllocSpace()->GetMspace(), mspace);
-  return heap->GetAllocSpace()->MoreCore(increment);
-}
-
-void* DlMallocSpace::MoreCore(intptr_t increment) {
-  lock_.AssertHeld(Thread::Current());
-  byte* original_end = end_;
-  if (increment != 0) {
-    VLOG(heap) << "DlMallocSpace::MoreCore " << PrettySize(increment);
-    byte* new_end = original_end + increment;
-    if (increment > 0) {
-      // Should never be asked to increase the allocation beyond the capacity of the space. Enforced
-      // by mspace_set_footprint_limit.
-      CHECK_LE(new_end, Begin() + Capacity());
-      CHECK_MEMORY_CALL(mprotect, (original_end, increment, PROT_READ | PROT_WRITE), GetName());
-    } else {
-      // Should never be asked for negative footprint (ie before begin)
-      CHECK_GT(original_end + increment, Begin());
-      // Advise we don't need the pages and protect them
-      // TODO: by removing permissions to the pages we may be causing TLB shoot-down which can be
-      // expensive (note the same isn't true for giving permissions to a page as the protected
-      // page shouldn't be in a TLB). We should investigate performance impact of just
-      // removing ignoring the memory protection change here and in Space::CreateAllocSpace. It's
-      // likely just a useful debug feature.
-      size_t size = -increment;
-      CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
-      CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
-    }
-    // Update end_
-    end_ = new_end;
-  }
-  return original_end;
+  DCHECK(heap->GetNonMovingSpace()->IsDlMallocSpace());
+  DCHECK_EQ(heap->GetNonMovingSpace()->AsDlMallocSpace()->GetMspace(), mspace);
+  return heap->GetNonMovingSpace()->MoreCore(increment);
 }
 
 // Virtual functions can't get inlined.
@@ -484,7 +249,7 @@
 
 void DlMallocSpace::SetFootprintLimit(size_t new_size) {
   MutexLock mu(Thread::Current(), lock_);
-  VLOG(heap) << "DLMallocSpace::SetFootprintLimit " << PrettySize(new_size);
+  VLOG(heap) << "DlMallocSpace::SetFootprintLimit " << PrettySize(new_size);
   // Compare against the actual footprint, rather than the Size(), because the heap may not have
   // grown all the way to the allowed size yet.
   size_t current_space_size = mspace_footprint(mspace_);
@@ -495,14 +260,6 @@
   mspace_set_footprint_limit(mspace_, new_size);
 }
 
-void DlMallocSpace::Dump(std::ostream& os) const {
-  os << GetType()
-      << " begin=" << reinterpret_cast<void*>(Begin())
-      << ",end=" << reinterpret_cast<void*>(End())
-      << ",size=" << PrettySize(Size()) << ",capacity=" << PrettySize(Capacity())
-      << ",name=\"" << GetName() << "\"]";
-}
-
 uint64_t DlMallocSpace::GetBytesAllocated() {
   MutexLock mu(Thread::Current(), lock_);
   size_t bytes_allocated = 0;
@@ -517,6 +274,12 @@
   return objects_allocated;
 }
 
+#ifndef NDEBUG
+void DlMallocSpace::CheckMoreCoreForPrecondition() {
+  lock_.AssertHeld(Thread::Current());
+}
+#endif
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 522535e..d18d4ad 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_H_
 
 #include "gc/allocator/dlmalloc.h"
+#include "malloc_space.h"
 #include "space.h"
 
 namespace art {
@@ -30,33 +31,18 @@
 namespace space {
 
 // An alloc space is a space where objects may be allocated and garbage collected.
-class DlMallocSpace : public MemMapSpace, public AllocSpace {
+class DlMallocSpace : public MallocSpace {
  public:
-  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
-
-  SpaceType GetType() const {
-    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
-      return kSpaceTypeZygoteSpace;
-    } else {
-      return kSpaceTypeAllocSpace;
-    }
-  }
-
-  // Create a AllocSpace with the requested sizes. The requested
+  // Create a DlMallocSpace with the requested sizes. The requested
   // base address is not guaranteed to be granted, if it is required,
-  // the caller should call Begin on the returned space to confirm
-  // the request was granted.
+  // the caller should call Begin on the returned space to confirm the
+  // request was granted.
   static DlMallocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
                                size_t capacity, byte* requested_begin);
 
-  // Allocate num_bytes without allowing the underlying mspace to grow.
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
-
-  // Allocate num_bytes allowing the underlying mspace to grow.
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-
-  // Return the storage space required by obj.
   virtual size_t AllocationSize(const mirror::Object* obj);
   virtual size_t Free(Thread* self, mirror::Object* ptr);
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
@@ -64,17 +50,19 @@
   mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
   size_t AllocationSizeNonvirtual(const mirror::Object* obj) {
-    return mspace_usable_size(const_cast<void*>(reinterpret_cast<const void*>(obj))) +
-        kChunkOverhead;
+    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+    return mspace_usable_size(obj_ptr) + kChunkOverhead;
   }
 
-  void* MoreCore(intptr_t increment);
+#ifndef NDEBUG
+  // Override only in the debug build.
+  void CheckMoreCoreForPrecondition();
+#endif
 
   void* GetMspace() const {
     return mspace_;
   }
 
-  // Hands unused pages back to the system.
   size_t Trim();
 
   // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
@@ -93,39 +81,8 @@
   // allocations fail we GC before increasing the footprint limit and allowing the mspace to grow.
   void SetFootprintLimit(size_t limit);
 
-  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
-  // maximum reserved size of the heap.
-  void ClearGrowthLimit() {
-    growth_limit_ = NonGrowthLimitCapacity();
-  }
-
-  // Override capacity so that we only return the possibly limited capacity
-  size_t Capacity() const {
-    return growth_limit_;
-  }
-
-  // The total amount of memory reserved for the alloc space.
-  size_t NonGrowthLimitCapacity() const {
-    return GetMemMap()->Size();
-  }
-
-  accounting::SpaceBitmap* GetLiveBitmap() const {
-    return live_bitmap_.get();
-  }
-
-  accounting::SpaceBitmap* GetMarkBitmap() const {
-    return mark_bitmap_.get();
-  }
-
-  void Dump(std::ostream& os) const;
-
-  void SetGrowthLimit(size_t growth_limit);
-
-  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
-  void SwapBitmaps();
-
-  // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
-  DlMallocSpace* CreateZygoteSpace(const char* alloc_space_name);
+  MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                              byte* begin, byte* end, byte* limit, size_t growth_limit);
 
   uint64_t GetBytesAllocated();
   uint64_t GetObjectsAllocated();
@@ -139,51 +96,45 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
+  virtual void InvalidateAllocator() {
+    mspace_for_alloc_ = nullptr;
+  }
+
+  virtual bool IsDlMallocSpace() const {
+    return true;
+  }
+  virtual DlMallocSpace* AsDlMallocSpace() {
+    return this;
+  }
+
  protected:
   DlMallocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin, byte* end,
-                size_t growth_limit);
+                byte* limit, size_t growth_limit);
 
  private:
   size_t InternalAllocationSize(const mirror::Object* obj);
-  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
+
+  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  bool Init(size_t initial_size, size_t maximum_size, size_t growth_size, byte* requested_base);
-  void RegisterRecentFree(mirror::Object* ptr);
-  static void* CreateMallocSpace(void* base, size_t morecore_start, size_t initial_size);
 
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
-
-  // Recent allocation buffer.
-  static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
-  static constexpr size_t kRecentFreeMask = kRecentFreeCount - 1;
-  std::pair<const mirror::Object*, mirror::Class*> recent_freed_objects_[kRecentFreeCount];
-  size_t recent_free_pos_;
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
+    return CreateMspace(base, morecore_start, initial_size);
+  }
+  static void* CreateMspace(void* base, size_t morecore_start, size_t initial_size);
 
   // Approximate number of bytes and objects which have been deallocated in the space.
   size_t total_bytes_freed_;
   size_t total_objects_freed_;
 
-  static size_t bitmap_index_;
-
   // The boundary tag overhead.
   static const size_t kChunkOverhead = kWordSize;
 
-  // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
-  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-
   // Underlying malloc space
   void* const mspace_;
 
-  // The capacity of the alloc space until such time that ClearGrowthLimit is called.
-  // The underlying mem_map_ controls the maximum size we allow the heap to grow to. The growth
-  // limit is a value <= to the mem_map_ capacity used for ergonomic reasons because of the zygote.
-  // Prior to forking the zygote the heap will have a maximally sized mem_map_ but the growth_limit_
-  // will be set to a lower value. The growth_limit_ is used as the capacity of the alloc_space_,
-  // however, capacity normally can't vary. In the case of the growth_limit_ it can be cleared
-  // one time by a call to ClearGrowthLimit.
-  size_t growth_limit_;
+  // A mspace pointer used for allocation. Equals to what mspace_
+  // points to or nullptr after InvalidateAllocator() is called.
+  void* mspace_for_alloc_;
 
   friend class collector::MarkSweep;
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index e12ee06..c6177bd 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -39,8 +39,9 @@
 
 ImageSpace::ImageSpace(const std::string& name, MemMap* mem_map,
                        accounting::SpaceBitmap* live_bitmap)
-    : MemMapSpace(name, mem_map, mem_map->Size(), kGcRetentionPolicyNeverCollect) {
-  DCHECK(live_bitmap != NULL);
+    : MemMapSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
+                  kGcRetentionPolicyNeverCollect) {
+  DCHECK(live_bitmap != nullptr);
   live_bitmap_.reset(live_bitmap);
 }
 
@@ -332,7 +333,7 @@
 
 void ImageSpace::Dump(std::ostream& os) const {
   os << GetType()
-      << "begin=" << reinterpret_cast<void*>(Begin())
+      << " begin=" << reinterpret_cast<void*>(Begin())
       << ",end=" << reinterpret_cast<void*>(End())
       << ",size=" << PrettySize(Size())
       << ",name=\"" << GetName() << "\"]";
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index ef889d4..d374ad3 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -59,6 +59,14 @@
 
   size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
 
+  virtual bool IsAllocSpace() const {
+    return true;
+  }
+
+  virtual AllocSpace* AsAllocSpace() {
+    return this;
+  }
+
  protected:
   explicit LargeObjectSpace(const std::string& name);
 
@@ -108,7 +116,8 @@
   virtual ~FreeListSpace();
   static FreeListSpace* Create(const std::string& name, byte* requested_begin, size_t capacity);
 
-  size_t AllocationSize(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  size_t AllocationSize(const mirror::Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* obj);
   bool Contains(const mirror::Object* obj) const;
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
new file mode 100644
index 0000000..785b5ed
--- /dev/null
+++ b/runtime/gc/space/malloc_space.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "malloc_space.h"
+
+#include "gc/accounting/card_table.h"
+#include "gc/heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "utils.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+size_t MallocSpace::bitmap_index_ = 0;
+
+MallocSpace::MallocSpace(const std::string& name, MemMap* mem_map,
+                         byte* begin, byte* end, byte* limit, size_t growth_limit)
+    : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
+      recent_free_pos_(0), lock_("allocation space lock", kAllocSpaceLock),
+      growth_limit_(growth_limit) {
+  size_t bitmap_index = bitmap_index_++;
+  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
+  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
+  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
+  live_bitmap_.reset(accounting::SpaceBitmap::Create(
+      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+      Begin(), Capacity()));
+  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
+  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
+      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+      Begin(), Capacity()));
+  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
+  for (auto& freed : recent_freed_objects_) {
+    freed.first = nullptr;
+    freed.second = nullptr;
+  }
+}
+
+MemMap* MallocSpace::CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
+                                  size_t* growth_limit, size_t* capacity, byte* requested_begin) {
+  // Sanity check arguments
+  if (starting_size > *initial_size) {
+    *initial_size = starting_size;
+  }
+  if (*initial_size > *growth_limit) {
+    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the initial size ("
+        << PrettySize(*initial_size) << ") is larger than its capacity ("
+        << PrettySize(*growth_limit) << ")";
+    return NULL;
+  }
+  if (*growth_limit > *capacity) {
+    LOG(ERROR) << "Failed to create alloc space (" << name << ") where the growth limit capacity ("
+        << PrettySize(*growth_limit) << ") is larger than the capacity ("
+        << PrettySize(*capacity) << ")";
+    return NULL;
+  }
+
+  // Page align growth limit and capacity which will be used to manage mmapped storage
+  *growth_limit = RoundUp(*growth_limit, kPageSize);
+  *capacity = RoundUp(*capacity, kPageSize);
+
+  std::string error_msg;
+  MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, *capacity,
+                                         PROT_READ | PROT_WRITE, &error_msg);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
+               << PrettySize(*capacity) << ": " << error_msg;
+    return NULL;
+  }
+  return mem_map;
+}
+
+void MallocSpace::SwapBitmaps() {
+  live_bitmap_.swap(mark_bitmap_);
+  // Swap names to get more descriptive diagnostics.
+  std::string temp_name(live_bitmap_->GetName());
+  live_bitmap_->SetName(mark_bitmap_->GetName());
+  mark_bitmap_->SetName(temp_name);
+}
+
+mirror::Class* MallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
+  size_t pos = recent_free_pos_;
+  // Start at the most recently freed object and work our way back since there may be duplicates
+  // caused by dlmalloc reusing memory.
+  if (kRecentFreeCount > 0) {
+    for (size_t i = 0; i + 1 < kRecentFreeCount + 1; ++i) {
+      pos = pos != 0 ? pos - 1 : kRecentFreeMask;
+      if (recent_freed_objects_[pos].first == obj) {
+        return recent_freed_objects_[pos].second;
+      }
+    }
+  }
+  return nullptr;
+}
+
+void MallocSpace::RegisterRecentFree(mirror::Object* ptr) {
+  recent_freed_objects_[recent_free_pos_] = std::make_pair(ptr, ptr->GetClass());
+  recent_free_pos_ = (recent_free_pos_ + 1) & kRecentFreeMask;
+}
+
+void MallocSpace::SetGrowthLimit(size_t growth_limit) {
+  growth_limit = RoundUp(growth_limit, kPageSize);
+  growth_limit_ = growth_limit;
+  if (Size() > growth_limit_) {
+    end_ = begin_ + growth_limit;
+  }
+}
+
+void* MallocSpace::MoreCore(intptr_t increment) {
+  CheckMoreCoreForPrecondition();
+  byte* original_end = end_;
+  if (increment != 0) {
+    VLOG(heap) << "MallocSpace::MoreCore " << PrettySize(increment);
+    byte* new_end = original_end + increment;
+    if (increment > 0) {
+      // Should never be asked to increase the allocation beyond the capacity of the space. Enforced
+      // by mspace_set_footprint_limit.
+      CHECK_LE(new_end, Begin() + Capacity());
+      CHECK_MEMORY_CALL(mprotect, (original_end, increment, PROT_READ | PROT_WRITE), GetName());
+    } else {
+      // Should never be asked for negative footprint (ie before begin). Zero footprint is ok.
+      CHECK_GE(original_end + increment, Begin());
+      // Advise we don't need the pages and protect them
+      // TODO: by removing permissions to the pages we may be causing TLB shoot-down which can be
+      // expensive (note the same isn't true for giving permissions to a page as the protected
+      // page shouldn't be in a TLB). We should investigate performance impact of just
+      // removing ignoring the memory protection change here and in Space::CreateAllocSpace. It's
+      // likely just a useful debug feature.
+      size_t size = -increment;
+      CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
+      CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
+    }
+    // Update end_
+    end_ = new_end;
+  }
+  return original_end;
+}
+
+// Returns the old mark bitmap.
+accounting::SpaceBitmap* MallocSpace::BindLiveToMarkBitmap() {
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
+  temp_bitmap_.reset(mark_bitmap);
+  mark_bitmap_.reset(live_bitmap);
+  return mark_bitmap;
+}
+
+bool MallocSpace::HasBoundBitmaps() const {
+  return temp_bitmap_.get() != nullptr;
+}
+
+void MallocSpace::UnBindBitmaps() {
+  CHECK(HasBoundBitmaps());
+  // At this point, the temp_bitmap holds our old mark bitmap.
+  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
+  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
+  mark_bitmap_.reset(new_bitmap);
+  DCHECK(temp_bitmap_.get() == NULL);
+}
+
+MallocSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name) {
+  // For RosAlloc, revoke thread local runs before creating a new
+  // alloc space so that we won't mix thread local runs from different
+  // alloc spaces.
+  RevokeAllThreadLocalBuffers();
+  end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
+  DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
+  DCHECK(IsAligned<kPageSize>(begin_));
+  DCHECK(IsAligned<kPageSize>(end_));
+  size_t size = RoundUp(Size(), kPageSize);
+  // Trim the heap so that we minimize the size of the Zygote space.
+  Trim();
+  // TODO: Not hardcode these in?
+  const size_t starting_size = kPageSize;
+  const size_t initial_size = 2 * MB;
+  // Remaining size is for the new alloc space.
+  const size_t growth_limit = growth_limit_ - size;
+  const size_t capacity = Capacity() - size;
+  VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
+             << "End " << reinterpret_cast<const void*>(end_) << "\n"
+             << "Size " << size << "\n"
+             << "GrowthLimit " << growth_limit_ << "\n"
+             << "Capacity " << Capacity();
+  SetGrowthLimit(RoundUp(size, kPageSize));
+  SetFootprintLimit(RoundUp(size, kPageSize));
+  // FIXME: Do we need reference counted pointers here?
+  // Make the two spaces share the same mark bitmaps since the bitmaps span both of the spaces.
+  VLOG(heap) << "Creating new AllocSpace: ";
+  VLOG(heap) << "Size " << GetMemMap()->Size();
+  VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
+  VLOG(heap) << "Capacity " << PrettySize(capacity);
+  // Remap the tail.
+  std::string error_msg;
+  UniquePtr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
+                                                    PROT_READ | PROT_WRITE, &error_msg));
+  CHECK(mem_map.get() != nullptr) << error_msg;
+  void* allocator = CreateAllocator(end_, starting_size, initial_size);
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
+  }
+  MallocSpace* alloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator,
+                                            end_, end, limit_, growth_limit);
+  SetLimit(End());
+  live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
+  CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
+  mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
+  CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
+  VLOG(heap) << "zygote space creation done";
+  return alloc_space;
+}
+
+void MallocSpace::Dump(std::ostream& os) const {
+  os << GetType()
+      << " begin=" << reinterpret_cast<void*>(Begin())
+      << ",end=" << reinterpret_cast<void*>(End())
+      << ",size=" << PrettySize(Size()) << ",capacity=" << PrettySize(Capacity())
+      << ",name=\"" << GetName() << "\"]";
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
new file mode 100644
index 0000000..0f882d3
--- /dev/null
+++ b/runtime/gc/space/malloc_space.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
+
+#include "space.h"
+
+#include <valgrind.h>
+#include <memcheck/memcheck.h>
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// TODO: Remove define macro
+#define CHECK_MEMORY_CALL(call, args, what) \
+  do { \
+    int rc = call args; \
+    if (UNLIKELY(rc != 0)) { \
+      errno = rc; \
+      PLOG(FATAL) << # call << " failed for " << what; \
+    } \
+  } while (false)
+
+// const bool kUseRosAlloc = true;
+
+// A common parent of DlMallocSpace and RosAllocSpace.
+class MallocSpace : public ContinuousMemMapAllocSpace {
+ public:
+  typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
+
+  SpaceType GetType() const {
+    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
+      return kSpaceTypeZygoteSpace;
+    } else {
+      return kSpaceTypeAllocSpace;
+    }
+  }
+
+  // Allocate num_bytes without allowing the underlying space to grow.
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) = 0;
+  // Allocate num_bytes allowing the underlying space to grow.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
+  // Return the storage space required by obj.
+  virtual size_t AllocationSize(const mirror::Object* obj) = 0;
+  virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
+
+#ifndef NDEBUG
+  virtual void CheckMoreCoreForPrecondition() {}  // to be overridden in the debug build.
+#else
+  void CheckMoreCoreForPrecondition() {}  // no-op in the non-debug build.
+#endif
+
+  void* MoreCore(intptr_t increment);
+
+  // Hands unused pages back to the system.
+  virtual size_t Trim() = 0;
+
+  // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
+  // in use, indicated by num_bytes equaling zero.
+  virtual void Walk(WalkCallback callback, void* arg) = 0;
+
+  // Returns the number of bytes that the space has currently obtained from the system. This is
+  // greater or equal to the amount of live data in the space.
+  virtual size_t GetFootprint() = 0;
+
+  // Returns the number of bytes that the heap is allowed to obtain from the system via MoreCore.
+  virtual size_t GetFootprintLimit() = 0;
+
+  // Set the maximum number of bytes that the heap is allowed to obtain from the system via
+  // MoreCore. Note this is used to stop the mspace growing beyond the limit to Capacity. When
+  // allocations fail we GC before increasing the footprint limit and allowing the mspace to grow.
+  virtual void SetFootprintLimit(size_t limit) = 0;
+
+  // Removes the fork time growth limit on capacity, allowing the application to allocate up to the
+  // maximum reserved size of the heap.
+  void ClearGrowthLimit() {
+    growth_limit_ = NonGrowthLimitCapacity();
+  }
+
+  // Override capacity so that we only return the possibly limited capacity
+  size_t Capacity() const {
+    return growth_limit_;
+  }
+
+  // The total amount of memory reserved for the alloc space.
+  size_t NonGrowthLimitCapacity() const {
+    return GetMemMap()->Size();
+  }
+
+  accounting::SpaceBitmap* GetLiveBitmap() const {
+    return live_bitmap_.get();
+  }
+
+  accounting::SpaceBitmap* GetMarkBitmap() const {
+    return mark_bitmap_.get();
+  }
+
+  void Dump(std::ostream& os) const;
+
+  void SetGrowthLimit(size_t growth_limit);
+
+  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
+  void SwapBitmaps();
+
+  virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                                      byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
+
+  // Turn ourself into a zygote space and return a new alloc space which has our unused memory.
+  MallocSpace* CreateZygoteSpace(const char* alloc_space_name);
+
+  virtual uint64_t GetBytesAllocated() = 0;
+  virtual uint64_t GetObjectsAllocated() = 0;
+  virtual uint64_t GetTotalBytesAllocated() = 0;
+  virtual uint64_t GetTotalObjectsAllocated() = 0;
+
+  // Returns the old mark bitmap.
+  accounting::SpaceBitmap* BindLiveToMarkBitmap();
+  bool HasBoundBitmaps() const;
+  void UnBindBitmaps();
+
+  // Returns the class of a recently freed object.
+  mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
+
+  // Used to ensure that failure happens when you free / allocate into an invalidated space. If we
+  // don't do this we may get heap corruption instead of a segfault at null.
+  virtual void InvalidateAllocator() = 0;
+
+ protected:
+  MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
+              byte* limit, size_t growth_limit);
+
+  static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
+                              size_t* growth_limit, size_t* capacity, byte* requested_begin);
+
+  virtual void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) = 0;
+
+  void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+
+  // Recent allocation buffer.
+  static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
+  static constexpr size_t kRecentFreeMask = kRecentFreeCount - 1;
+  std::pair<const mirror::Object*, mirror::Class*> recent_freed_objects_[kRecentFreeCount];
+  size_t recent_free_pos_;
+
+  static size_t bitmap_index_;
+
+  // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+  // The capacity of the alloc space until such time that ClearGrowthLimit is called.
+  // The underlying mem_map_ controls the maximum size we allow the heap to grow to. The growth
+  // limit is a value <= to the mem_map_ capacity used for ergonomic reasons because of the zygote.
+  // Prior to forking the zygote the heap will have a maximally sized mem_map_ but the growth_limit_
+  // will be set to a lower value. The growth_limit_ is used as the capacity of the alloc_space_,
+  // however, capacity normally can't vary. In the case of the growth_limit_ it can be cleared
+  // one time by a call to ClearGrowthLimit.
+  size_t growth_limit_;
+
+  friend class collector::MarkSweep;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MallocSpace);
+};
+
+// Number of bytes to use as a red zone (rdz). A red zone of this size will be placed before and
+// after each allocation. 8 bytes provides long/double alignment.
+static constexpr size_t kValgrindRedZoneBytes = 8;
+
+// A specialization of DlMallocSpace/RosAllocSpace that provides information to valgrind wrt allocations.
+template <typename BaseMallocSpaceType, typename AllocatorType>
+class ValgrindMallocSpace : public BaseMallocSpaceType {
+ public:
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    void* obj_with_rdz = BaseMallocSpaceType::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                                              bytes_allocated);
+    if (obj_with_rdz == NULL) {
+      return NULL;
+    }
+    mirror::Object* result = reinterpret_cast<mirror::Object*>(
+        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
+    // Make redzones as no access.
+    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
+    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
+    return result;
+  }
+
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    void* obj_with_rdz = BaseMallocSpaceType::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                                    bytes_allocated);
+    if (obj_with_rdz == NULL) {
+     return NULL;
+    }
+    mirror::Object* result = reinterpret_cast<mirror::Object*>(
+        reinterpret_cast<byte*>(obj_with_rdz) + kValgrindRedZoneBytes);
+    // Make redzones as no access.
+    VALGRIND_MAKE_MEM_NOACCESS(obj_with_rdz, kValgrindRedZoneBytes);
+    VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<byte*>(result) + num_bytes, kValgrindRedZoneBytes);
+    return result;
+  }
+
+  virtual size_t AllocationSize(const mirror::Object* obj) {
+    size_t result = BaseMallocSpaceType::AllocationSize(reinterpret_cast<const mirror::Object*>(
+        reinterpret_cast<const byte*>(obj) - kValgrindRedZoneBytes));
+    return result - 2 * kValgrindRedZoneBytes;
+  }
+
+  virtual size_t Free(Thread* self, mirror::Object* ptr) {
+    void* obj_after_rdz = reinterpret_cast<void*>(ptr);
+    void* obj_with_rdz = reinterpret_cast<byte*>(obj_after_rdz) - kValgrindRedZoneBytes;
+    // Make redzones undefined.
+    size_t allocation_size = BaseMallocSpaceType::AllocationSize(
+        reinterpret_cast<mirror::Object*>(obj_with_rdz));
+    VALGRIND_MAKE_MEM_UNDEFINED(obj_with_rdz, allocation_size);
+    size_t freed = BaseMallocSpaceType::Free(self, reinterpret_cast<mirror::Object*>(obj_with_rdz));
+    return freed - 2 * kValgrindRedZoneBytes;
+  }
+
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+    size_t freed = 0;
+    for (size_t i = 0; i < num_ptrs; i++) {
+      freed += Free(self, ptrs[i]);
+    }
+    return freed;
+  }
+
+  ValgrindMallocSpace(const std::string& name, MemMap* mem_map, AllocatorType allocator, byte* begin,
+                      byte* end, byte* limit, size_t growth_limit, size_t initial_size) :
+      BaseMallocSpaceType(name, mem_map, allocator, begin, end, limit, growth_limit) {
+    VALGRIND_MAKE_MEM_UNDEFINED(mem_map->Begin() + initial_size, mem_map->Size() - initial_size);
+  }
+
+  virtual ~ValgrindMallocSpace() {
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ValgrindMallocSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_MALLOC_SPACE_H_
diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h
new file mode 100644
index 0000000..5de4265
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space-inl.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
+
+#include "gc/allocator/rosalloc-inl.h"
+#include "rosalloc_space.h"
+#include "thread.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+inline mirror::Object* RosAllocSpace::AllocNonvirtual(Thread* self, size_t num_bytes,
+                                                      size_t* bytes_allocated) {
+  mirror::Object* obj;
+  obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+  // RosAlloc zeroes memory internally.
+  return obj;
+}
+
+inline mirror::Object* RosAllocSpace::AllocWithoutGrowthLocked(Thread* self, size_t num_bytes,
+                                                               size_t* bytes_allocated) {
+  size_t rosalloc_size = 0;
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(
+      rosalloc_for_alloc_->Alloc(self, num_bytes,
+                                 &rosalloc_size));
+  if (LIKELY(result != NULL)) {
+    if (kDebugSpaces) {
+      CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
+            << ") not in bounds of allocation space " << *this;
+    }
+    DCHECK(bytes_allocated != NULL);
+    *bytes_allocated = rosalloc_size;
+  }
+  return result;
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
new file mode 100644
index 0000000..1f8e324
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -0,0 +1,299 @@
+
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rosalloc_space.h"
+
+#include "rosalloc_space-inl.h"
+#include "gc/accounting/card_table.h"
+#include "gc/heap.h"
+#include "mirror/class-inl.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "utils.h"
+
+#include <valgrind.h>
+#include <memcheck/memcheck.h>
+
+namespace art {
+namespace gc {
+namespace space {
+
+static const bool kPrefetchDuringRosAllocFreeList = true;
+
+RosAllocSpace::RosAllocSpace(const std::string& name, MemMap* mem_map,
+                             art::gc::allocator::RosAlloc* rosalloc, byte* begin, byte* end,
+                             byte* limit, size_t growth_limit)
+    : MallocSpace(name, mem_map, begin, end, limit, growth_limit), rosalloc_(rosalloc),
+      rosalloc_for_alloc_(rosalloc) {
+  CHECK(rosalloc != NULL);
+}
+
+RosAllocSpace* RosAllocSpace::Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                                     size_t capacity, byte* requested_begin) {
+  uint64_t start_time = 0;
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    start_time = NanoTime();
+    VLOG(startup) << "RosAllocSpace::Create entering " << name
+                  << " initial_size=" << PrettySize(initial_size)
+                  << " growth_limit=" << PrettySize(growth_limit)
+                  << " capacity=" << PrettySize(capacity)
+                  << " requested_begin=" << reinterpret_cast<void*>(requested_begin);
+  }
+
+  // Memory we promise to rosalloc before it asks for morecore.
+  // Note: making this value large means that large allocations are unlikely to succeed as rosalloc
+  // will ask for this memory from sys_alloc which will fail as the footprint (this value plus the
+  // size of the large allocation) will be greater than the footprint limit.
+  size_t starting_size = kPageSize;
+  MemMap* mem_map = CreateMemMap(name, starting_size, &initial_size, &growth_limit, &capacity,
+                                 requested_begin);
+  if (mem_map == NULL) {
+    LOG(ERROR) << "Failed to create mem map for alloc space (" << name << ") of size "
+               << PrettySize(capacity);
+    return NULL;
+  }
+  allocator::RosAlloc* rosalloc = CreateRosAlloc(mem_map->Begin(), starting_size, initial_size);
+  if (rosalloc == NULL) {
+    LOG(ERROR) << "Failed to initialize rosalloc for alloc space (" << name << ")";
+    return NULL;
+  }
+
+  // Protect memory beyond the initial size.
+  byte* end = mem_map->Begin() + starting_size;
+  if (capacity - initial_size > 0) {
+    CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), name);
+  }
+
+  // Everything is set so record in immutable structure and leave
+  RosAllocSpace* space;
+  byte* begin = mem_map->Begin();
+  if (RUNNING_ON_VALGRIND > 0) {
+    space = new ValgrindMallocSpace<RosAllocSpace, art::gc::allocator::RosAlloc*>(
+        name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit, initial_size);
+  } else {
+    space = new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit);
+  }
+  // We start out with only the initial size possibly containing objects.
+  if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
+    LOG(INFO) << "RosAllocSpace::Create exiting (" << PrettyDuration(NanoTime() - start_time)
+        << " ) " << *space;
+  }
+  return space;
+}
+
+allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_start, size_t initial_size) {
+  // clear errno to allow PLOG on error
+  errno = 0;
+  // create rosalloc using our backing storage starting at begin and
+  // with a footprint of morecore_start. When morecore_start bytes of
+  // memory is exhaused morecore will be called.
+  allocator::RosAlloc* rosalloc = new art::gc::allocator::RosAlloc(begin, morecore_start);
+  if (rosalloc != NULL) {
+    rosalloc->SetFootprintLimit(initial_size);
+  } else {
+    PLOG(ERROR) << "RosAlloc::Create failed";
+    }
+  return rosalloc;
+}
+
+mirror::Object* RosAllocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  return AllocNonvirtual(self, num_bytes, bytes_allocated);
+}
+
+mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* result;
+  {
+    MutexLock mu(self, lock_);
+    // Grow as much as possible within the space.
+    size_t max_allowed = Capacity();
+    rosalloc_->SetFootprintLimit(max_allowed);
+    // Try the allocation.
+    result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated);
+    // Shrink back down as small as possible.
+    size_t footprint = rosalloc_->Footprint();
+    rosalloc_->SetFootprintLimit(footprint);
+  }
+  // Note RosAlloc zeroes memory internally.
+  // Return the new allocation or NULL.
+  CHECK(!kDebugSpaces || result == NULL || Contains(result));
+  return result;
+}
+
+MallocSpace* RosAllocSpace::CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                                           byte* begin, byte* end, byte* limit, size_t growth_limit) {
+  return new RosAllocSpace(name, mem_map, reinterpret_cast<allocator::RosAlloc*>(allocator),
+                           begin, end, limit, growth_limit);
+}
+
+size_t RosAllocSpace::Free(Thread* self, mirror::Object* ptr) {
+  if (kDebugSpaces) {
+    CHECK(ptr != NULL);
+    CHECK(Contains(ptr)) << "Free (" << ptr << ") not in bounds of heap " << *this;
+  }
+  const size_t bytes_freed = InternalAllocationSize(ptr);
+  total_bytes_freed_atomic_.fetch_add(bytes_freed);
+  ++total_objects_freed_atomic_;
+  if (kRecentFreeCount > 0) {
+    MutexLock mu(self, lock_);
+    RegisterRecentFree(ptr);
+  }
+  rosalloc_->Free(self, ptr);
+  return bytes_freed;
+}
+
+size_t RosAllocSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+  DCHECK(ptrs != NULL);
+
+  // Don't need the lock to calculate the size of the freed pointers.
+  size_t bytes_freed = 0;
+  for (size_t i = 0; i < num_ptrs; i++) {
+    mirror::Object* ptr = ptrs[i];
+    const size_t look_ahead = 8;
+    if (kPrefetchDuringRosAllocFreeList && i + look_ahead < num_ptrs) {
+      __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + look_ahead]));
+    }
+    bytes_freed += InternalAllocationSize(ptr);
+  }
+
+  if (kRecentFreeCount > 0) {
+    MutexLock mu(self, lock_);
+    for (size_t i = 0; i < num_ptrs; i++) {
+      RegisterRecentFree(ptrs[i]);
+    }
+  }
+
+  if (kDebugSpaces) {
+    size_t num_broken_ptrs = 0;
+    for (size_t i = 0; i < num_ptrs; i++) {
+      if (!Contains(ptrs[i])) {
+        num_broken_ptrs++;
+        LOG(ERROR) << "FreeList[" << i << "] (" << ptrs[i] << ") not in bounds of heap " << *this;
+      } else {
+        size_t size = rosalloc_->UsableSize(ptrs[i]);
+        memset(ptrs[i], 0xEF, size);
+      }
+    }
+    CHECK_EQ(num_broken_ptrs, 0u);
+  }
+
+  rosalloc_->BulkFree(self, reinterpret_cast<void**>(ptrs), num_ptrs);
+  total_bytes_freed_atomic_.fetch_add(bytes_freed);
+  total_objects_freed_atomic_.fetch_add(num_ptrs);
+  return bytes_freed;
+}
+
+// Callback from rosalloc when it needs to increase the footprint
+extern "C" void* art_heap_rosalloc_morecore(allocator::RosAlloc* rosalloc, intptr_t increment) {
+  Heap* heap = Runtime::Current()->GetHeap();
+  DCHECK(heap->GetNonMovingSpace()->IsRosAllocSpace());
+  DCHECK_EQ(heap->GetNonMovingSpace()->AsRosAllocSpace()->GetRosAlloc(), rosalloc);
+  return heap->GetNonMovingSpace()->MoreCore(increment);
+}
+
+// Virtual functions can't get inlined.
+inline size_t RosAllocSpace::InternalAllocationSize(const mirror::Object* obj) {
+  return AllocationSizeNonvirtual(obj);
+}
+
+size_t RosAllocSpace::AllocationSize(const mirror::Object* obj) {
+  return InternalAllocationSize(obj);
+}
+
+size_t RosAllocSpace::Trim() {
+  MutexLock mu(Thread::Current(), lock_);
+  // Trim to release memory at the end of the space.
+  rosalloc_->Trim();
+  // No inspect_all necessary here as trimming of pages is built-in.
+  return 0;
+}
+
+void RosAllocSpace::Walk(void(*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                         void* arg) {
+  InspectAllRosAlloc(callback, arg);
+  callback(NULL, NULL, 0, arg);  // Indicate end of a space.
+}
+
+size_t RosAllocSpace::GetFootprint() {
+  MutexLock mu(Thread::Current(), lock_);
+  return rosalloc_->Footprint();
+}
+
+size_t RosAllocSpace::GetFootprintLimit() {
+  MutexLock mu(Thread::Current(), lock_);
+  return rosalloc_->FootprintLimit();
+}
+
+void RosAllocSpace::SetFootprintLimit(size_t new_size) {
+  MutexLock mu(Thread::Current(), lock_);
+  VLOG(heap) << "RosAllocSpace::SetFootprintLimit " << PrettySize(new_size);
+  // Compare against the actual footprint, rather than the Size(), because the heap may not have
+  // grown all the way to the allowed size yet.
+  size_t current_space_size = rosalloc_->Footprint();
+  if (new_size < current_space_size) {
+    // Don't let the space grow any more.
+    new_size = current_space_size;
+  }
+  rosalloc_->SetFootprintLimit(new_size);
+}
+
+uint64_t RosAllocSpace::GetBytesAllocated() {
+  size_t bytes_allocated = 0;
+  InspectAllRosAlloc(art::gc::allocator::RosAlloc::BytesAllocatedCallback, &bytes_allocated);
+  return bytes_allocated;
+}
+
+uint64_t RosAllocSpace::GetObjectsAllocated() {
+  size_t objects_allocated = 0;
+  InspectAllRosAlloc(art::gc::allocator::RosAlloc::ObjectsAllocatedCallback, &objects_allocated);
+  return objects_allocated;
+}
+
+void RosAllocSpace::InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                                       void* arg) NO_THREAD_SAFETY_ANALYSIS {
+  // TODO: NO_THREAD_SAFETY_ANALYSIS.
+  Thread* self = Thread::Current();
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // The mutators are already suspended. For example, a call path
+    // from SignalCatcher::HandleSigQuit().
+    rosalloc_->InspectAll(callback, arg);
+  } else {
+    // The mutators are not suspended yet.
+    DCHECK(!Locks::mutator_lock_->IsSharedHeld(self));
+    ThreadList* tl = Runtime::Current()->GetThreadList();
+    tl->SuspendAll();
+    {
+      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+      MutexLock mu2(self, *Locks::thread_list_lock_);
+      rosalloc_->InspectAll(callback, arg);
+    }
+    tl->ResumeAll();
+  }
+}
+
+void RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) {
+  rosalloc_->RevokeThreadLocalRuns(thread);
+}
+
+void RosAllocSpace::RevokeAllThreadLocalBuffers() {
+  rosalloc_->RevokeAllThreadLocalRuns();
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
new file mode 100644
index 0000000..6311580
--- /dev/null
+++ b/runtime/gc/space/rosalloc_space.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
+
+#include "gc/allocator/rosalloc.h"
+#include "malloc_space.h"
+#include "space.h"
+
+namespace art {
+namespace gc {
+
+namespace collector {
+  class MarkSweep;
+}  // namespace collector
+
+namespace space {
+
+// An alloc space is a space where objects may be allocated and garbage collected.
+class RosAllocSpace : public MallocSpace {
+ public:
+  // Create a RosAllocSpace with the requested sizes. The requested
+  // base address is not guaranteed to be granted, if it is required,
+  // the caller should call Begin on the returned space to confirm the
+  // request was granted.
+  static RosAllocSpace* Create(const std::string& name, size_t initial_size, size_t growth_limit,
+                               size_t capacity, byte* requested_begin);
+
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+  virtual size_t AllocationSize(const mirror::Object* obj);
+  virtual size_t Free(Thread* self, mirror::Object* ptr);
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
+
+  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+      NO_THREAD_SAFETY_ANALYSIS {
+    // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held.
+    void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
+    // obj is a valid object. Use its class in the header to get the size.
+    size_t size = obj->SizeOf();
+    size_t size_by_size = rosalloc_->UsableSize(size);
+    if (kIsDebugBuild) {
+      size_t size_by_ptr = rosalloc_->UsableSize(obj_ptr);
+      if (size_by_size != size_by_ptr) {
+        LOG(INFO) << "Found a bad sized obj of size " << size
+                  << " at " << std::hex << reinterpret_cast<intptr_t>(obj_ptr) << std::dec
+                  << " size_by_size=" << size_by_size << " size_by_ptr=" << size_by_ptr;
+      }
+      DCHECK_EQ(size_by_size, size_by_ptr);
+    }
+    return size_by_size;
+  }
+
+  art::gc::allocator::RosAlloc* GetRosAlloc() {
+    return rosalloc_;
+  }
+
+  size_t Trim();
+  void Walk(WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
+  size_t GetFootprint();
+  size_t GetFootprintLimit();
+  void SetFootprintLimit(size_t limit);
+
+  MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
+                              byte* begin, byte* end, byte* limit, size_t growth_limit);
+
+  uint64_t GetBytesAllocated();
+  uint64_t GetObjectsAllocated();
+  uint64_t GetTotalBytesAllocated() {
+    return GetBytesAllocated() + total_bytes_freed_atomic_;
+  }
+  uint64_t GetTotalObjectsAllocated() {
+    return GetObjectsAllocated() + total_objects_freed_atomic_;
+  }
+
+  void RevokeThreadLocalBuffers(Thread* thread);
+  void RevokeAllThreadLocalBuffers();
+
+  // Returns the class of a recently freed object.
+  mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
+
+  virtual void InvalidateAllocator() {
+    rosalloc_for_alloc_ = NULL;
+  }
+
+  virtual bool IsRosAllocSpace() const {
+    return true;
+  }
+  virtual RosAllocSpace* AsRosAllocSpace() {
+    return this;
+  }
+
+ protected:
+  RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
+                byte* begin, byte* end, byte* limit, size_t growth_limit);
+
+ private:
+  size_t InternalAllocationSize(const mirror::Object* obj);
+  mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size) {
+    return CreateRosAlloc(base, morecore_start, initial_size);
+  }
+  static allocator::RosAlloc* CreateRosAlloc(void* base, size_t morecore_start, size_t initial_size);
+
+
+  void InspectAllRosAlloc(void (*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
+                          void* arg)
+      LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_);
+
+  // Approximate number of bytes and objects which have been deallocated in the space.
+  AtomicInteger total_bytes_freed_atomic_;
+  AtomicInteger total_objects_freed_atomic_;
+
+  // Underlying rosalloc.
+  art::gc::allocator::RosAlloc* const rosalloc_;
+
+  // A rosalloc pointer used for allocation. Equals to what rosalloc_
+  // points to or nullptr after InvalidateAllocator() is called.
+  art::gc::allocator::RosAlloc* rosalloc_for_alloc_;
+
+  friend class collector::MarkSweep;
+
+  DISALLOW_COPY_AND_ASSIGN(RosAllocSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_H_
diff --git a/runtime/gc/space/space-inl.h b/runtime/gc/space/space-inl.h
index 2c3b93c..0c1d7a2 100644
--- a/runtime/gc/space/space-inl.h
+++ b/runtime/gc/space/space-inl.h
@@ -27,18 +27,29 @@
 namespace space {
 
 inline ImageSpace* Space::AsImageSpace() {
-  DCHECK_EQ(GetType(), kSpaceTypeImageSpace);
+  DCHECK(IsImageSpace());
   return down_cast<ImageSpace*>(down_cast<MemMapSpace*>(this));
 }
 
-inline DlMallocSpace* Space::AsDlMallocSpace() {
+inline MallocSpace* Space::AsMallocSpace() {
   DCHECK(GetType() == kSpaceTypeAllocSpace || GetType() == kSpaceTypeZygoteSpace);
-  return down_cast<DlMallocSpace*>(down_cast<MemMapSpace*>(this));
+  DCHECK(IsDlMallocSpace() || IsRosAllocSpace());
+  return down_cast<MallocSpace*>(down_cast<MemMapSpace*>(this));
 }
 
 inline LargeObjectSpace* Space::AsLargeObjectSpace() {
-  DCHECK_EQ(GetType(), kSpaceTypeLargeObjectSpace);
-  return reinterpret_cast<LargeObjectSpace*>(this);
+  DCHECK(IsLargeObjectSpace());
+  return down_cast<LargeObjectSpace*>(this);
+}
+
+inline ContinuousSpace* Space::AsContinuousSpace() {
+  DCHECK(IsContinuousSpace());
+  return down_cast<ContinuousSpace*>(this);
+}
+
+inline DiscontinuousSpace* Space::AsDiscontinuousSpace() {
+  DCHECK(IsDiscontinuousSpace());
+  return down_cast<DiscontinuousSpace*>(this);
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index de48b74..8eb17e0 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -34,7 +34,6 @@
   return os;
 }
 
-
 DiscontinuousSpace::DiscontinuousSpace(const std::string& name,
                                        GcRetentionPolicy gc_retention_policy) :
     Space(name, gc_retention_policy),
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 6dd7952..ca39175 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -42,7 +42,13 @@
 
 namespace space {
 
+class AllocSpace;
+class BumpPointerSpace;
+class ContinuousSpace;
+class DiscontinuousSpace;
+class MallocSpace;
 class DlMallocSpace;
+class RosAllocSpace;
 class ImageSpace;
 class LargeObjectSpace;
 
@@ -64,6 +70,7 @@
   kSpaceTypeImageSpace,
   kSpaceTypeAllocSpace,
   kSpaceTypeZygoteSpace,
+  kSpaceTypeBumpPointerSpace,
   kSpaceTypeLargeObjectSpace,
 };
 std::ostream& operator<<(std::ostream& os, const SpaceType& space_type);
@@ -102,23 +109,65 @@
   ImageSpace* AsImageSpace();
 
   // Is this a dlmalloc backed allocation space?
-  bool IsDlMallocSpace() const {
+  bool IsMallocSpace() const {
     SpaceType type = GetType();
     return type == kSpaceTypeAllocSpace || type == kSpaceTypeZygoteSpace;
   }
-  DlMallocSpace* AsDlMallocSpace();
+  MallocSpace* AsMallocSpace();
+
+  virtual bool IsDlMallocSpace() const {
+    return false;
+  }
+  virtual DlMallocSpace* AsDlMallocSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
+  virtual bool IsRosAllocSpace() const {
+    return false;
+  }
+  virtual RosAllocSpace* AsRosAllocSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
 
   // Is this the space allocated into by the Zygote and no-longer in use?
   bool IsZygoteSpace() const {
     return GetType() == kSpaceTypeZygoteSpace;
   }
 
+  // Is this space a bump pointer space?
+  bool IsBumpPointerSpace() const {
+    return GetType() == kSpaceTypeBumpPointerSpace;
+  }
+  virtual BumpPointerSpace* AsBumpPointerSpace() {
+    LOG(FATAL) << "Unreachable";
+    return NULL;
+  }
+
   // Does this space hold large objects and implement the large object space abstraction?
   bool IsLargeObjectSpace() const {
     return GetType() == kSpaceTypeLargeObjectSpace;
   }
   LargeObjectSpace* AsLargeObjectSpace();
 
+  virtual bool IsContinuousSpace() const {
+    return false;
+  }
+  ContinuousSpace* AsContinuousSpace();
+
+  virtual bool IsDiscontinuousSpace() const {
+    return false;
+  }
+  DiscontinuousSpace* AsDiscontinuousSpace();
+
+  virtual bool IsAllocSpace() const {
+    return false;
+  }
+  virtual AllocSpace* AsAllocSpace() {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+
   virtual ~Space() {}
 
  protected:
@@ -131,13 +180,13 @@
   // Name of the space that may vary due to the Zygote fork.
   std::string name_;
 
- private:
+ protected:
   // When should objects within this space be reclaimed? Not constant as we vary it in the case
   // of Zygote forking.
   GcRetentionPolicy gc_retention_policy_;
 
+ private:
   friend class art::gc::Heap;
-
   DISALLOW_COPY_AND_ASSIGN(Space);
 };
 std::ostream& operator<<(std::ostream& os, const Space& space);
@@ -168,6 +217,16 @@
   // Returns how many bytes were freed.
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
 
+  // Revoke any sort of thread-local buffers that are used to speed up
+  // allocations for the given thread, if the alloc space
+  // implementation uses any. No-op by default.
+  virtual void RevokeThreadLocalBuffers(Thread* /*thread*/) {}
+
+  // Revoke any sort of thread-local buffers that are used to speed up
+  // allocations for all the threads, if the alloc space
+  // implementation uses any. No-op by default.
+  virtual void RevokeAllThreadLocalBuffers() {}
+
  protected:
   AllocSpace() {}
   virtual ~AllocSpace() {}
@@ -180,16 +239,31 @@
 // continuous spaces can be marked in the card table.
 class ContinuousSpace : public Space {
  public:
-  // Address at which the space begins
+  // Address at which the space begins.
   byte* Begin() const {
     return begin_;
   }
 
-  // Address at which the space ends, which may vary as the space is filled.
+  // Current address at which the space ends, which may vary as the space is filled.
   byte* End() const {
     return end_;
   }
 
+  // The end of the address range covered by the space.
+  byte* Limit() const {
+    return limit_;
+  }
+
+  // Change the end of the space. Be careful with use since changing the end of a space to an
+  // invalid value may break the GC.
+  void SetEnd(byte* end) {
+    end_ = end;
+  }
+
+  void SetLimit(byte* limit) {
+    limit_ = limit;
+  }
+
   // Current size of space
   size_t Size() const {
     return End() - Begin();
@@ -198,31 +272,42 @@
   virtual accounting::SpaceBitmap* GetLiveBitmap() const = 0;
   virtual accounting::SpaceBitmap* GetMarkBitmap() const = 0;
 
+  // Maximum which the mapped space can grow to.
+  virtual size_t Capacity() const {
+    return Limit() - Begin();
+  }
+
   // Is object within this space? We check to see if the pointer is beyond the end first as
   // continuous spaces are iterated over from low to high.
   bool HasAddress(const mirror::Object* obj) const {
     const byte* byte_ptr = reinterpret_cast<const byte*>(obj);
-    return byte_ptr < End() && byte_ptr >= Begin();
+    return byte_ptr >= Begin() && byte_ptr < Limit();
   }
 
   bool Contains(const mirror::Object* obj) const {
     return HasAddress(obj);
   }
 
+  virtual bool IsContinuousSpace() const {
+    return true;
+  }
+
   virtual ~ContinuousSpace() {}
 
  protected:
   ContinuousSpace(const std::string& name, GcRetentionPolicy gc_retention_policy,
-                  byte* begin, byte* end) :
-      Space(name, gc_retention_policy), begin_(begin), end_(end) {
+                  byte* begin, byte* end, byte* limit) :
+      Space(name, gc_retention_policy), begin_(begin), end_(end), limit_(limit) {
   }
 
-
   // The beginning of the storage for fast access.
-  byte* const begin_;
+  byte* begin_;
 
   // Current end of the space.
-  byte* end_;
+  byte* volatile end_;
+
+  // Limit of the space.
+  byte* limit_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(ContinuousSpace);
@@ -241,6 +326,10 @@
     return mark_objects_.get();
   }
 
+  virtual bool IsDiscontinuousSpace() const {
+    return true;
+  }
+
   virtual ~DiscontinuousSpace() {}
 
  protected:
@@ -255,25 +344,12 @@
 
 class MemMapSpace : public ContinuousSpace {
  public:
-  // Maximum which the mapped space can grow to.
-  virtual size_t Capacity() const {
-    return mem_map_->Size();
-  }
-
   // Size of the space without a limit on its growth. By default this is just the Capacity, but
   // for the allocation space we support starting with a small heap and then extending it.
   virtual size_t NonGrowthLimitCapacity() const {
     return Capacity();
   }
 
- protected:
-  MemMapSpace(const std::string& name, MemMap* mem_map, size_t initial_size,
-              GcRetentionPolicy gc_retention_policy)
-      : ContinuousSpace(name, gc_retention_policy,
-                        mem_map->Begin(), mem_map->Begin() + initial_size),
-        mem_map_(mem_map) {
-  }
-
   MemMap* GetMemMap() {
     return mem_map_.get();
   }
@@ -282,13 +358,45 @@
     return mem_map_.get();
   }
 
- private:
+ protected:
+  MemMapSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end, byte* limit,
+              GcRetentionPolicy gc_retention_policy)
+      : ContinuousSpace(name, gc_retention_policy, begin, end, limit),
+        mem_map_(mem_map) {
+  }
+
   // Underlying storage of the space
   UniquePtr<MemMap> mem_map_;
 
+ private:
   DISALLOW_COPY_AND_ASSIGN(MemMapSpace);
 };
 
+// Used by the heap compaction interface to enable copying from one type of alloc space to another.
+class ContinuousMemMapAllocSpace : public MemMapSpace, public AllocSpace {
+ public:
+  virtual bool IsAllocSpace() const {
+    return true;
+  }
+
+  virtual AllocSpace* AsAllocSpace() {
+    return this;
+  }
+
+  virtual void Clear() {
+    LOG(FATAL) << "Unimplemented";
+  }
+
+ protected:
+  ContinuousMemMapAllocSpace(const std::string& name, MemMap* mem_map, byte* begin,
+                             byte* end, byte* limit, GcRetentionPolicy gc_retention_policy)
+      : MemMapSpace(name, mem_map, begin, end, limit, gc_retention_policy) {
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ContinuousMemMapAllocSpace);
+};
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 455168c..6b597ae 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -20,6 +20,8 @@
 #include "common_test.h"
 #include "globals.h"
 #include "UniquePtr.h"
+#include "mirror/array-inl.h"
+#include "mirror/object-inl.h"
 
 #include <stdint.h>
 
@@ -33,8 +35,25 @@
                                            int round, size_t growth_limit);
   void SizeFootPrintGrowthLimitAndTrimDriver(size_t object_size);
 
-  void AddContinuousSpace(ContinuousSpace* space) {
-    Runtime::Current()->GetHeap()->AddContinuousSpace(space);
+  void AddSpace(ContinuousSpace* space) {
+    // For RosAlloc, revoke the thread local runs before moving onto a
+    // new alloc space.
+    Runtime::Current()->GetHeap()->RevokeAllThreadLocalBuffers();
+    Runtime::Current()->GetHeap()->AddSpace(space);
+  }
+  void InstallClass(mirror::Object* o, size_t size) NO_THREAD_SAFETY_ANALYSIS {
+    // Note the minimum size, which is the size of a zero-length byte array, is 12.
+    EXPECT_GE(size, static_cast<size_t>(12));
+    SirtRef<mirror::ClassLoader> null_loader(Thread::Current(), NULL);
+    mirror::Class* byte_array_class = Runtime::Current()->GetClassLinker()->FindClass("[B", null_loader);
+    EXPECT_TRUE(byte_array_class != NULL);
+    o->SetClass(byte_array_class);
+    mirror::Array* arr = o->AsArray();
+    // size_t header_size = sizeof(mirror::Object) + 4;
+    size_t header_size = arr->DataOffset(1).Uint32Value();
+    int32_t length = size - header_size;
+    arr->SetLength(length);
+    EXPECT_EQ(arr->SizeOf(), size);
   }
 };
 
@@ -87,16 +106,17 @@
 // the GC works with the ZygoteSpace.
 TEST_F(SpaceTest, ZygoteSpace) {
     size_t dummy = 0;
-    DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
+    MallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
     ASSERT_TRUE(space != NULL);
 
     // Make space findable to the heap, will also delete space when runtime is cleaned up
-    AddContinuousSpace(space);
+    AddSpace(space);
     Thread* self = Thread::Current();
 
     // Succeeds, fits without adjusting the footprint limit.
     mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
+    InstallClass(ptr1, 1 * MB);
 
     // Fails, requires a higher footprint limit.
     mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -107,6 +127,7 @@
     mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
     EXPECT_TRUE(ptr3 != NULL);
     EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+    InstallClass(ptr3, 8 * MB);
 
     // Fails, requires a higher footprint limit.
     mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
@@ -123,8 +144,9 @@
     EXPECT_LE(8U * MB, free3);
 
     // Succeeds, now that memory has been freed.
-    void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
+    mirror::Object* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
     EXPECT_TRUE(ptr6 != NULL);
+    InstallClass(ptr6, 9 * MB);
 
     // Final clean up.
     size_t free1 = space->AllocationSize(ptr1);
@@ -136,11 +158,12 @@
     space = space->CreateZygoteSpace("alloc space");
 
     // Make space findable to the heap, will also delete space when runtime is cleaned up
-    AddContinuousSpace(space);
+    AddSpace(space);
 
     // Succeeds, fits without adjusting the footprint limit.
     ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
+    InstallClass(ptr1, 1 * MB);
 
     // Fails, requires a higher footprint limit.
     ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -149,6 +172,7 @@
     // Succeeds, adjusts the footprint.
     ptr3 = space->AllocWithGrowth(self, 2 * MB, &dummy);
     EXPECT_TRUE(ptr3 != NULL);
+    InstallClass(ptr3, 2 * MB);
     space->Free(self, ptr3);
 
     // Final clean up.
@@ -164,11 +188,12 @@
   Thread* self = Thread::Current();
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddContinuousSpace(space);
+  AddSpace(space);
 
   // Succeeds, fits without adjusting the footprint limit.
   mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
   EXPECT_TRUE(ptr1 != NULL);
+  InstallClass(ptr1, 1 * MB);
 
   // Fails, requires a higher footprint limit.
   mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
@@ -179,6 +204,7 @@
   mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
   EXPECT_TRUE(ptr3 != NULL);
   EXPECT_LE(8U * MB, ptr3_bytes_allocated);
+  InstallClass(ptr3, 8 * MB);
 
   // Fails, requires a higher footprint limit.
   mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
@@ -195,8 +221,9 @@
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
-  void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
+  mirror::Object* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
   EXPECT_TRUE(ptr6 != NULL);
+  InstallClass(ptr6, 9 * MB);
 
   // Final clean up.
   size_t free1 = space->AllocationSize(ptr1);
@@ -270,7 +297,7 @@
   ASSERT_TRUE(space != NULL);
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddContinuousSpace(space);
+  AddSpace(space);
   Thread* self = Thread::Current();
 
   // Succeeds, fits without adjusting the max allowed footprint.
@@ -278,8 +305,9 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
+    InstallClass(lots_of_objects[i], 16);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
@@ -292,8 +320,9 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size);
-    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
+    InstallClass(lots_of_objects[i], 1024);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
@@ -310,22 +339,20 @@
     // No allocation can succeed
     return;
   }
-  // Mspace for raw dlmalloc operations
-  void* mspace = space->GetMspace();
 
-  // mspace's footprint equals amount of resources requested from system
-  size_t footprint = mspace_footprint(mspace);
+  // The space's footprint equals amount of resources requested from system
+  size_t footprint = space->GetFootprint();
 
-  // mspace must at least have its book keeping allocated
+  // The space must at least have its book keeping allocated
   EXPECT_GT(footprint, 0u);
 
-  // mspace but it shouldn't exceed the initial size
+  // But it shouldn't exceed the initial size
   EXPECT_LE(footprint, growth_limit);
 
   // space's size shouldn't exceed the initial size
   EXPECT_LE(space->Size(), growth_limit);
 
-  // this invariant should always hold or else the mspace has grown to be larger than what the
+  // this invariant should always hold or else the space has grown to be larger than what the
   // space believes its size is (which will break invariants)
   EXPECT_GE(space->Size(), footprint);
 
@@ -345,8 +372,9 @@
         alloc_size = object_size;
       } else {
         alloc_size = test_rand(&rand_seed) % static_cast<size_t>(-object_size);
-        if (alloc_size < 8) {
-          alloc_size = 8;
+        // Note the minimum size, which is the size of a zero-length byte array, is 12.
+        if (alloc_size < 12) {
+          alloc_size = 12;
         }
       }
       mirror::Object* object;
@@ -356,9 +384,10 @@
       } else {
         object = space->AllocWithGrowth(self, alloc_size, &bytes_allocated);
       }
-      footprint = mspace_footprint(mspace);
+      footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
       if (object != NULL) {  // allocation succeeded
+        InstallClass(object, alloc_size);
         lots_of_objects.get()[i] = object;
         size_t allocation_size = space->AllocationSize(object);
         EXPECT_EQ(bytes_allocated, allocation_size);
@@ -395,7 +424,7 @@
     space->Trim();
 
     // Bounds sanity
-    footprint = mspace_footprint(mspace);
+    footprint = space->GetFootprint();
     EXPECT_LE(amount_allocated, growth_limit);
     EXPECT_GE(footprint, amount_allocated);
     EXPECT_LE(footprint, growth_limit);
@@ -421,13 +450,21 @@
       space->Free(self, object);
       lots_of_objects.get()[i] = NULL;
       amount_allocated -= allocation_size;
-      footprint = mspace_footprint(mspace);
+      footprint = space->GetFootprint();
       EXPECT_GE(space->Size(), footprint);  // invariant
     }
 
     free_increment >>= 1;
   }
 
+  // The space has become empty here before allocating a large object
+  // below. For RosAlloc, revoke thread-local runs, which are kept
+  // even when empty for a performance reason, so that they won't
+  // cause the following large object allocation to fail due to
+  // potential fragmentation. Note they are normally revoked at each
+  // GC (but no GC here.)
+  space->RevokeAllThreadLocalBuffers();
+
   // All memory was released, try a large allocation to check freed memory is being coalesced
   mirror::Object* large_object;
   size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4);
@@ -438,9 +475,10 @@
     large_object = space->AllocWithGrowth(self, three_quarters_space, &bytes_allocated);
   }
   EXPECT_TRUE(large_object != NULL);
+  InstallClass(large_object, three_quarters_space);
 
   // Sanity check footprint
-  footprint = mspace_footprint(mspace);
+  footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
   EXPECT_GE(space->Size(), footprint);
   EXPECT_LE(space->Size(), growth_limit);
@@ -449,7 +487,7 @@
   space->Free(self, large_object);
 
   // Sanity check footprint
-  footprint = mspace_footprint(mspace);
+  footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
   EXPECT_GE(space->Size(), footprint);
   EXPECT_LE(space->Size(), growth_limit);
@@ -467,7 +505,7 @@
   EXPECT_EQ(space->NonGrowthLimitCapacity(), capacity);
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
-  AddContinuousSpace(space);
+  AddSpace(space);
 
   // In this round we don't allocate with growth and therefore can't grow past the initial size.
   // This effectively makes the growth_limit the initial_size, so assert this.
@@ -488,8 +526,8 @@
   }
 
 // Each size test is its own test so that we get a fresh heap each time
-TEST_F(SpaceTest, SizeFootPrintGrowthLimitAndTrim_AllocationsOf_8B) {
-  SizeFootPrintGrowthLimitAndTrimDriver(8);
+TEST_F(SpaceTest, SizeFootPrintGrowthLimitAndTrim_AllocationsOf_12B) {
+  SizeFootPrintGrowthLimitAndTrimDriver(12);
 }
 TEST_SizeFootPrintGrowthLimitAndTrim(16B, 16)
 TEST_SizeFootPrintGrowthLimitAndTrim(24B, 24)
diff --git a/runtime/globals.h b/runtime/globals.h
index 31574ff..c2fe67e 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -26,53 +26,68 @@
 typedef intptr_t word;
 typedef uintptr_t uword;
 
-const size_t KB = 1024;
-const size_t MB = KB * KB;
-const size_t GB = KB * KB * KB;
+static constexpr size_t KB = 1024;
+static constexpr size_t MB = KB * KB;
+static constexpr size_t GB = KB * KB * KB;
 
-const size_t kWordSize = sizeof(word);
-const size_t kPointerSize = sizeof(void*);
+static constexpr size_t kWordSize = sizeof(word);
+static constexpr size_t kPointerSize = sizeof(void*);
 
-const size_t kBitsPerByte = 8;
-const size_t kBitsPerByteLog2 = 3;
-const int kBitsPerWord = kWordSize * kBitsPerByte;
-const size_t kWordHighBitMask = 1 << (kBitsPerWord - 1);
+static constexpr size_t kBitsPerByte = 8;
+static constexpr size_t kBitsPerByteLog2 = 3;
+static constexpr int kBitsPerWord = kWordSize * kBitsPerByte;
+static constexpr size_t kWordHighBitMask = 1 << (kBitsPerWord - 1);
 
 // Required stack alignment
-const size_t kStackAlignment = 16;
+static constexpr size_t kStackAlignment = 16;
 
 // Required object alignment
-const size_t kObjectAlignment = 8;
+static constexpr size_t kObjectAlignment = 8;
 
 // ARM instruction alignment. ARM processors require code to be 4-byte aligned,
 // but ARM ELF requires 8..
-const size_t kArmAlignment = 8;
+static constexpr size_t kArmAlignment = 8;
 
 // MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
 // TODO: Can this be 4?
-const size_t kMipsAlignment = 8;
+static constexpr size_t kMipsAlignment = 8;
 
 // X86 instruction alignment. This is the recommended alignment for maximum performance.
-const size_t kX86Alignment = 16;
+static constexpr size_t kX86Alignment = 16;
 
 // System page size. We check this against sysconf(_SC_PAGE_SIZE) at runtime, but use a simple
 // compile-time constant so the compiler can generate better code.
-const int kPageSize = 4096;
+static constexpr int kPageSize = 4096;
 
 // Whether or not this is a debug build. Useful in conditionals where NDEBUG isn't.
 #if defined(NDEBUG)
-const bool kIsDebugBuild = false;
+static constexpr bool kIsDebugBuild = false;
 #else
-const bool kIsDebugBuild = true;
+static constexpr bool kIsDebugBuild = true;
 #endif
 
 // Whether or not this is a target (vs host) build. Useful in conditionals where ART_TARGET isn't.
 #if defined(ART_TARGET)
-const bool kIsTargetBuild = true;
+static constexpr bool kIsTargetBuild = true;
 #else
-const bool kIsTargetBuild = false;
+static constexpr bool kIsTargetBuild = false;
 #endif
 
+#if defined(ART_USE_PORTABLE_COMPILER)
+static constexpr bool kUsePortableCompiler = true;
+#else
+static constexpr bool kUsePortableCompiler = false;
+#endif
+
+// Garbage collector constants.
+static constexpr bool kMovingCollector = true && !kUsePortableCompiler;
+// True if we allow moving classes.
+static constexpr bool kMovingClasses = false;
+// True if we allow moving fields.
+static constexpr bool kMovingFields = false;
+// True if we allow moving methods.
+static constexpr bool kMovingMethods = false;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_GLOBALS_H_
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 67620a0..9f899e8 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -537,7 +537,7 @@
     HprofRecord* rec = &current_record_;
 
     for (StringMapIterator it = strings_.begin(); it != strings_.end(); ++it) {
-      std::string string((*it).first);
+      const std::string& string = (*it).first;
       size_t id = (*it).second;
 
       int err = current_record_.StartNewRecord(header_fp_, HPROF_TAG_STRING, HPROF_TIME);
diff --git a/runtime/indenter.h b/runtime/indenter.h
index c432e1b..d055d4e 100644
--- a/runtime/indenter.h
+++ b/runtime/indenter.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_INDENTER_H_
 #define ART_RUNTIME_INDENTER_H_
 
+#include "base/logging.h"
 #include "base/macros.h"
 #include <streambuf>
 
@@ -30,16 +31,28 @@
 
  private:
   int_type overflow(int_type c) {
-    if (c != std::char_traits<char>::eof()) {
-      if (indent_next_) {
-        for (size_t i = 0; i < count_; ++i) {
-          out_sbuf_->sputc(text_);
+    if (UNLIKELY(c == std::char_traits<char>::eof())) {
+      out_sbuf_->pubsync();
+      return c;
+    }
+    if (indent_next_) {
+      for (size_t i = 0; i < count_; ++i) {
+        int_type r = out_sbuf_->sputc(text_);
+        if (UNLIKELY(r != text_)) {
+          out_sbuf_->pubsync();
+          r = out_sbuf_->sputc(text_);
+          CHECK_EQ(r, text_) << "Error writing to buffer. Disk full?";
         }
       }
-      out_sbuf_->sputc(c);
-      indent_next_ = (c == '\n');
     }
-    return std::char_traits<char>::not_eof(c);
+    indent_next_ = (c == '\n');
+    int_type r = out_sbuf_->sputc(c);
+    if (UNLIKELY(r != c)) {
+      out_sbuf_->pubsync();
+      r = out_sbuf_->sputc(c);
+      CHECK_EQ(r, c) << "Error writing to buffer. Disk full?";
+    }
+    return r;
   }
 
   int sync() {
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 8cf486f..4ad9c63 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -39,8 +39,13 @@
 #include "thread_list.h"
 
 namespace art {
+
+extern void SetQuickAllocEntryPointsInstrumented(bool instrumented);
+
 namespace instrumentation {
 
+const bool kVerboseInstrumentation = false;
+
 // Do we want to deoptimize for method entry and exit listeners or just try to intercept
 // invocations? Deoptimization forces all code to run in the interpreter and considerably hurts the
 // application's performance.
@@ -54,10 +59,7 @@
 
 bool Instrumentation::InstallStubsForClass(mirror::Class* klass) {
   bool uninstall = !entry_exit_stubs_installed_ && !interpreter_stubs_installed_;
-  ClassLinker* class_linker = NULL;
-  if (uninstall) {
-    class_linker = Runtime::Current()->GetClassLinker();
-  }
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   bool is_initialized = klass->IsInitialized();
   for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
     mirror::ArtMethod* method = klass->GetDirectMethod(i);
@@ -73,7 +75,14 @@
         }
       } else {  // !uninstall
         if (!interpreter_stubs_installed_ || method->IsNative()) {
-          new_code = GetQuickInstrumentationEntryPoint();
+          // Do not overwrite resolution trampoline. When the trampoline initializes the method's
+          // class, all its static methods' code will be set to the instrumentation entry point.
+          // For more details, see ClassLinker::FixupStaticTrampolines.
+          if (is_initialized || !method->IsStatic() || method->IsConstructor()) {
+            new_code = GetQuickInstrumentationEntryPoint();
+          } else {
+            new_code = GetResolutionTrampoline(class_linker);
+          }
         } else {
           new_code = GetCompiledCodeToInterpreterBridge();
         }
@@ -391,12 +400,62 @@
   }
 }
 
+static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
+  thread->ResetQuickAllocEntryPointsForThread();
+}
+
+void Instrumentation::InstrumentQuickAllocEntryPoints() {
+  // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
+  //       should be guarded by a lock.
+  DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_.load(), 0);
+  const bool enable_instrumentation =
+      quick_alloc_entry_points_instrumentation_counter_.fetch_add(1) == 0;
+  if (enable_instrumentation) {
+    // Instrumentation wasn't enabled so enable it.
+    SetQuickAllocEntryPointsInstrumented(true);
+    ResetQuickAllocEntryPoints();
+  }
+}
+
+void Instrumentation::UninstrumentQuickAllocEntryPoints() {
+  // TODO: the read of quick_alloc_entry_points_instrumentation_counter_ is racey and this code
+  //       should be guarded by a lock.
+  DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_.load(), 0);
+  const bool disable_instrumentation =
+      quick_alloc_entry_points_instrumentation_counter_.fetch_sub(1) == 1;
+  if (disable_instrumentation) {
+    SetQuickAllocEntryPointsInstrumented(false);
+    ResetQuickAllocEntryPoints();
+  }
+}
+
+void Instrumentation::ResetQuickAllocEntryPoints() {
+  Runtime* runtime = Runtime::Current();
+  if (runtime->IsStarted()) {
+    ThreadList* tl = runtime->GetThreadList();
+    Thread* self = Thread::Current();
+    tl->SuspendAll();
+    {
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
+    }
+    tl->ResumeAll();
+  }
+}
+
 void Instrumentation::UpdateMethodsCode(mirror::ArtMethod* method, const void* code) const {
   if (LIKELY(!instrumentation_stubs_installed_)) {
     method->SetEntryPointFromCompiledCode(code);
   } else {
     if (!interpreter_stubs_installed_ || method->IsNative()) {
-      method->SetEntryPointFromCompiledCode(GetQuickInstrumentationEntryPoint());
+      // Do not overwrite resolution trampoline. When the trampoline initializes the method's
+      // class, all its static methods' code will be set to the instrumentation entry point.
+      // For more details, see ClassLinker::FixupStaticTrampolines.
+      if (code == GetResolutionTrampoline(Runtime::Current()->GetClassLinker())) {
+        method->SetEntryPointFromCompiledCode(code);
+      } else {
+        method->SetEntryPointFromCompiledCode(GetQuickInstrumentationEntryPoint());
+      }
     } else {
       method->SetEntryPointFromCompiledCode(GetCompiledCodeToInterpreterBridge());
     }
@@ -449,7 +508,7 @@
                                         uint32_t dex_pc) const {
   if (have_method_unwind_listeners_) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
-      listener->MethodUnwind(thread, method, dex_pc);
+      listener->MethodUnwind(thread, this_object, method, dex_pc);
     }
   }
 }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 7a0aaf7..72a646e 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_INSTRUMENTATION_H_
 #define ART_RUNTIME_INSTRUMENTATION_H_
 
+#include "atomic_integer.h"
 #include "base/macros.h"
 #include "locks.h"
 
@@ -36,8 +37,6 @@
 
 namespace instrumentation {
 
-const bool kVerboseInstrumentation = false;
-
 // Interpreter handler tables.
 enum InterpreterHandlerTable {
   kMainHandlerTable = 0,          // Main handler table: no suspend check, no instrumentation.
@@ -68,8 +67,9 @@
 
   // Call-back for when a method is popped due to an exception throw. A method will either cause a
   // MethodExited call-back or a MethodUnwind call-back when its activation is removed.
-  virtual void MethodUnwind(Thread* thread, const mirror::ArtMethod* method,
-                            uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                            const mirror::ArtMethod* method, uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when the dex pc moves in a method.
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
@@ -104,7 +104,8 @@
       have_method_entry_listeners_(false), have_method_exit_listeners_(false),
       have_method_unwind_listeners_(false), have_dex_pc_listeners_(false),
       have_exception_caught_listeners_(false),
-      interpreter_handler_table_(kMainHandlerTable) {}
+      interpreter_handler_table_(kMainHandlerTable),
+      quick_alloc_entry_points_instrumentation_counter_(0) {}
 
   // Add a listener to be notified of the masked together sent of instrumentation events. This
   // suspend the runtime to install stubs. You are expected to hold the mutator lock as a proxy
@@ -123,6 +124,10 @@
     return interpreter_handler_table_;
   }
 
+  void InstrumentQuickAllocEntryPoints() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+  void UninstrumentQuickAllocEntryPoints() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+  void ResetQuickAllocEntryPoints();
+
   // Update the code of a method respecting any installed stubs.
   void UpdateMethodsCode(mirror::ArtMethod* method, const void* code) const;
 
@@ -289,9 +294,14 @@
   std::list<InstrumentationListener*> dex_pc_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> exception_caught_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
-  // Current interpreter handler table. This is updated each time the thread state flags are modified.
+  // Current interpreter handler table. This is updated each time the thread state flags are
+  // modified.
   InterpreterHandlerTable interpreter_handler_table_;
 
+  // Greater than 0 if quick alloc entry points instrumented.
+  // TODO: The access and changes to this is racy and should be guarded by a lock.
+  AtomicInteger quick_alloc_entry_points_instrumentation_counter_;
+
   DISALLOW_COPY_AND_ASSIGN(Instrumentation);
 };
 
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 8f9e072..a829e97 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -48,7 +48,7 @@
   MutexLock mu(Thread::Current(), intern_table_lock_);
   if (!only_dirty || is_dirty_) {
     for (auto& strong_intern : strong_interns_) {
-      strong_intern.second = reinterpret_cast<mirror::String*>(visitor(strong_intern.second, arg));
+      strong_intern.second = down_cast<mirror::String*>(visitor(strong_intern.second, arg));
       DCHECK(strong_intern.second != nullptr);
     }
 
@@ -59,8 +59,7 @@
   // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
-mirror::String* InternTable::Lookup(Table& table, mirror::String* s,
-                                    uint32_t hash_code) {
+mirror::String* InternTable::Lookup(Table& table, mirror::String* s, uint32_t hash_code) {
   intern_table_lock_.AssertHeld(Thread::Current());
   for (auto it = table.find(hash_code), end = table.end(); it != end; ++it) {
     mirror::String* existing_string = it->second;
@@ -71,8 +70,7 @@
   return NULL;
 }
 
-mirror::String* InternTable::Insert(Table& table, mirror::String* s,
-                                    uint32_t hash_code) {
+mirror::String* InternTable::Insert(Table& table, mirror::String* s, uint32_t hash_code) {
   intern_table_lock_.AssertHeld(Thread::Current());
   table.insert(std::make_pair(hash_code, s));
   return s;
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index d7555dd..9938478 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -430,8 +430,8 @@
   if (method->IsStatic()) {
     Class* declaringClass = method->GetDeclaringClass();
     if (UNLIKELY(!declaringClass->IsInitializing())) {
-      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(declaringClass,
-                                                                            true, true))) {
+      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(declaringClass, true,
+                                                                            true))) {
         DCHECK(Thread::Current()->IsExceptionPending());
         self->PopShadowFrame();
         return;
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 19f55d2..c9756ac 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -29,7 +29,7 @@
                                   size_t dest_reg, size_t src_reg) {
   // If both register locations contains the same value, the register probably holds a reference.
   int32_t src_value = shadow_frame.GetVReg(src_reg);
-  mirror::Object* o = shadow_frame.GetVRegReference(src_reg);
+  mirror::Object* o = shadow_frame.GetVRegReference<false>(src_reg);
   if (src_value == reinterpret_cast<int32_t>(o)) {
     new_shadow_frame.SetVRegReference(dest_reg, o);
   } else {
@@ -193,7 +193,7 @@
     }
     return false;
   }
-  Object* newArray = Array::Alloc(self, arrayClass, length);
+  Object* newArray = Array::Alloc<true>(self, arrayClass, length);
   if (UNLIKELY(newArray == NULL)) {
     DCHECK(self->IsExceptionPending());
     return false;
@@ -233,7 +233,8 @@
   std::string name(PrettyMethod(shadow_frame->GetMethod()));
   if (name == "java.lang.Class java.lang.Class.forName(java.lang.String)") {
     std::string descriptor(DotToDescriptor(shadow_frame->GetVRegReference(arg_offset)->AsString()->ToModifiedUtf8().c_str()));
-    ClassLoader* class_loader = NULL;  // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
+
+    SirtRef<ClassLoader> class_loader(self, nullptr);  // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
     Class* found = Runtime::Current()->GetClassLinker()->FindClass(descriptor.c_str(),
                                                                    class_loader);
     CHECK(found != NULL) << "Class.forName failed in un-started runtime for class: "
@@ -278,7 +279,7 @@
     // TODO: getDeclaredField calls GetType once the field is found to ensure a
     //       NoClassDefFoundError is thrown if the field's type cannot be resolved.
     Class* jlr_Field = self->DecodeJObject(WellKnownClasses::java_lang_reflect_Field)->AsClass();
-    SirtRef<Object> field(self, jlr_Field->AllocObject(self));
+    SirtRef<Object> field(self, jlr_Field->AllocNonMovableObject(self));
     CHECK(field.get() != NULL);
     ArtMethod* c = jlr_Field->FindDeclaredDirectMethod("<init>", "(Ljava/lang/reflect/ArtField;)V");
     uint32_t args[1];
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index aa6bcd6..99c85bd 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -509,8 +509,9 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(NEW_INSTANCE) {
-    Object* obj = AllocObjectFromCodeInstrumented(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                                  self, do_access_check);
+    Object* obj = AllocObjectFromCode<do_access_check, true>(
+        inst->VRegB_21c(), shadow_frame.GetMethod(), self,
+        Runtime::Current()->GetHeap()->GetCurrentAllocator());
     if (UNLIKELY(obj == NULL)) {
       HANDLE_PENDING_EXCEPTION();
     } else {
@@ -522,8 +523,9 @@
 
   HANDLE_INSTRUCTION_START(NEW_ARRAY) {
     int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data));
-    Object* obj = AllocArrayFromCodeInstrumented(inst->VRegC_22c(), shadow_frame.GetMethod(),
-                                                 length, self, do_access_check);
+    Object* obj = AllocArrayFromCode<do_access_check, true>(
+        inst->VRegC_22c(), shadow_frame.GetMethod(), length, self,
+        Runtime::Current()->GetHeap()->GetCurrentAllocator());
     if (UNLIKELY(obj == NULL)) {
       HANDLE_PENDING_EXCEPTION();
     } else {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bd0d87e..675095f 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -422,8 +422,9 @@
       }
       case Instruction::NEW_INSTANCE: {
         PREAMBLE();
-        Object* obj = AllocObjectFromCodeInstrumented(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                                      self, do_access_check);
+        Object* obj = AllocObjectFromCode<do_access_check, true>(
+            inst->VRegB_21c(), shadow_frame.GetMethod(), self,
+            Runtime::Current()->GetHeap()->GetCurrentAllocator());
         if (UNLIKELY(obj == NULL)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
@@ -435,8 +436,9 @@
       case Instruction::NEW_ARRAY: {
         PREAMBLE();
         int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data));
-        Object* obj = AllocArrayFromCodeInstrumented(inst->VRegC_22c(), shadow_frame.GetMethod(),
-                                                     length, self, do_access_check);
+        Object* obj = AllocArrayFromCode<do_access_check, true>(
+            inst->VRegC_22c(), shadow_frame.GetMethod(), length, self,
+            Runtime::Current()->GetHeap()->GetCurrentAllocator());
         if (UNLIKELY(obj == NULL)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index a1657d0..fd78bf2 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -31,6 +31,7 @@
 struct iovec;
 
 namespace art {
+  union JValue;
 namespace mirror {
   class ArtMethod;
 }  // namespace mirror
@@ -185,8 +186,11 @@
    * issuing a MethodEntry on a native method.
    *
    * "eventFlags" indicates the types of events that have occurred.
+   *
+   * "returnValue" is non-null for MethodExit events only.
    */
-  bool PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags)
+  bool PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags,
+                         const JValue* returnValue)
      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 345549d..b05b49d 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -521,7 +521,7 @@
      * The JDWP thread has told us (and possibly all other threads) to
      * resume.  See if it has left anything in our DebugInvokeReq mailbox.
      */
-    if (!pReq->invoke_needed_) {
+    if (!pReq->invoke_needed) {
       /*LOGD("SuspendByPolicy: no invoke needed");*/
       break;
     }
@@ -535,12 +535,12 @@
     pReq->error = ERR_NONE;
 
     /* clear this before signaling */
-    pReq->invoke_needed_ = false;
+    pReq->invoke_needed = false;
 
     VLOG(jdwp) << "invoke complete, signaling and self-suspending";
     Thread* self = Thread::Current();
-    MutexLock mu(self, pReq->lock_);
-    pReq->cond_.Signal(self);
+    MutexLock mu(self, pReq->lock);
+    pReq->cond.Signal(self);
   }
 }
 
@@ -570,7 +570,7 @@
  */
 bool JdwpState::InvokeInProgress() {
   DebugInvokeReq* pReq = Dbg::GetInvokeReq();
-  return pReq->invoke_needed_;
+  return pReq->invoke_needed;
 }
 
 /*
@@ -719,7 +719,8 @@
  *  - Single-step to a line with a breakpoint.  Should get a single
  *    event message with both events in it.
  */
-bool JdwpState::PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags) {
+bool JdwpState::PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags,
+                                  const JValue* returnValue) {
   ModBasket basket;
   basket.pLoc = pLoc;
   basket.classId = pLoc->class_id;
@@ -771,9 +772,7 @@
     }
     if ((eventFlags & Dbg::kMethodExit) != 0) {
       FindMatchingEvents(EK_METHOD_EXIT, &basket, match_list, &match_count);
-
-      // TODO: match EK_METHOD_EXIT_WITH_RETURN_VALUE too; we need to include the 'value', though.
-      // FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, &basket, match_list, &match_count);
+      FindMatchingEvents(EK_METHOD_EXIT_WITH_RETURN_VALUE, &basket, match_list, &match_count);
     }
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
@@ -792,6 +791,9 @@
         expandBufAdd4BE(pReq, match_list[i]->requestId);
         expandBufAdd8BE(pReq, basket.threadId);
         expandBufAddLocation(pReq, *pLoc);
+        if (match_list[i]->eventKind == EK_METHOD_EXIT_WITH_RETURN_VALUE) {
+          Dbg::OutputMethodReturnValue(pLoc->method_id, returnValue, pReq);
+        }
       }
     }
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index ec717c1..466edeb 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -22,6 +22,7 @@
 #include <utility>
 #include <vector>
 
+#include "atomic_integer.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
@@ -292,8 +293,8 @@
   Class* field_type;
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   if (sig[1] != '\0') {
-    ClassLoader* cl = GetClassLoader(soa);
-    field_type = class_linker->FindClass(sig, cl);
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), GetClassLoader(soa));
+    field_type = class_linker->FindClass(sig, class_loader);
   } else {
     field_type = class_linker->FindPrimitiveClass(*sig);
   }
@@ -646,8 +647,8 @@
     ScopedObjectAccess soa(env);
     Class* c = NULL;
     if (runtime->IsStarted()) {
-      ClassLoader* cl = GetClassLoader(soa);
-      c = class_linker->FindClass(descriptor.c_str(), cl);
+      SirtRef<mirror::ClassLoader> class_loader(soa.Self(), GetClassLoader(soa));
+      c = class_linker->FindClass(descriptor.c_str(), class_loader);
     } else {
       c = class_linker->FindSystemClass(descriptor.c_str());
     }
@@ -2002,14 +2003,22 @@
     String* s = soa.Decode<String*>(java_string);
     CharArray* chars = s->GetCharArray();
     PinPrimitiveArray(soa, chars);
-    if (is_copy != NULL) {
-      *is_copy = JNI_FALSE;
+    if (is_copy != nullptr) {
+      *is_copy = JNI_TRUE;
     }
-    return chars->GetData() + s->GetOffset();
+    int32_t char_count = s->GetLength();
+    int32_t offset = s->GetOffset();
+    jchar* bytes = new jchar[char_count + 1];
+    for (int32_t i = 0; i < char_count; i++) {
+      bytes[i] = chars->Get(i + offset);
+    }
+    bytes[char_count] = '\0';
+    return bytes;
   }
 
-  static void ReleaseStringChars(JNIEnv* env, jstring java_string, const jchar*) {
+  static void ReleaseStringChars(JNIEnv* env, jstring java_string, const jchar* chars) {
     CHECK_NON_NULL_ARGUMENT(GetStringUTFRegion, java_string);
+    delete[] chars;
     ScopedObjectAccess soa(env);
     UnpinPrimitiveArray(soa, soa.Decode<String*>(java_string)->GetCharArray());
   }
@@ -2120,8 +2129,8 @@
 
     // Find the class.
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    Class* array_class = class_linker->FindClass(descriptor.c_str(),
-                                                 element_class->GetClassLoader());
+    SirtRef<mirror::ClassLoader> class_loader(soa.Self(), element_class->GetClassLoader());
+    Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
     if (array_class == NULL) {
       return NULL;
     }
@@ -2146,16 +2155,23 @@
     CHECK_NON_NULL_ARGUMENT(GetPrimitiveArrayCritical, java_array);
     ScopedObjectAccess soa(env);
     Array* array = soa.Decode<Array*>(java_array);
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    if (heap->IsMovableObject(array)) {
+      heap->IncrementDisableGC(soa.Self());
+      // Re-decode in case the object moved since IncrementDisableGC waits for GC to complete.
+      array = soa.Decode<Array*>(java_array);
+    }
     PinPrimitiveArray(soa, array);
-    if (is_copy != NULL) {
+    if (is_copy != nullptr) {
       *is_copy = JNI_FALSE;
     }
-    return array->GetRawData(array->GetClass()->GetComponentSize());
+    void* address = array->GetRawData(array->GetClass()->GetComponentSize());;
+    return address;
   }
 
-  static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray array, void*, jint mode) {
+  static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray array, void* elements, jint mode) {
     CHECK_NON_NULL_ARGUMENT(ReleasePrimitiveArrayCritical, array);
-    ReleasePrimitiveArray(env, array, mode);
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
   static jboolean* GetBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean* is_copy) {
@@ -2206,36 +2222,40 @@
     return GetPrimitiveArray<jshortArray, jshort*, ShortArray>(soa, array, is_copy);
   }
 
-  static void ReleaseBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean* elements,
+                                          jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseByteArrayElements(JNIEnv* env, jbyteArray array, jbyte*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseByteArrayElements(JNIEnv* env, jbyteArray array, jbyte* elements, jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseCharArrayElements(JNIEnv* env, jcharArray array, jchar*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseCharArrayElements(JNIEnv* env, jcharArray array, jchar* elements, jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseDoubleArrayElements(JNIEnv* env, jdoubleArray array, jdouble*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseDoubleArrayElements(JNIEnv* env, jdoubleArray array, jdouble* elements,
+                                         jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseFloatArrayElements(JNIEnv* env, jfloatArray array, jfloat*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseFloatArrayElements(JNIEnv* env, jfloatArray array, jfloat* elements,
+                                        jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseIntArrayElements(JNIEnv* env, jintArray array, jint*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseIntArrayElements(JNIEnv* env, jintArray array, jint* elements, jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseLongArrayElements(JNIEnv* env, jlongArray array, jlong*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseLongArrayElements(JNIEnv* env, jlongArray array, jlong* elements, jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
-  static void ReleaseShortArrayElements(JNIEnv* env, jshortArray array, jshort*, jint mode) {
-    ReleasePrimitiveArray(env, array, mode);
+  static void ReleaseShortArrayElements(JNIEnv* env, jshortArray array, jshort* elements,
+                                        jint mode) {
+    ReleasePrimitiveArray(env, array, elements, mode);
   }
 
   static void GetBooleanArrayRegion(JNIEnv* env, jbooleanArray array, jsize start, jsize length,
@@ -2551,19 +2571,49 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ArtArrayT* array = soa.Decode<ArtArrayT*>(java_array);
     PinPrimitiveArray(soa, array);
-    if (is_copy != NULL) {
-      *is_copy = JNI_FALSE;
+    // Only make a copy if necessary.
+    if (Runtime::Current()->GetHeap()->IsMovableObject(array)) {
+      if (is_copy != nullptr) {
+        *is_copy = JNI_TRUE;
+      }
+      static const size_t component_size = array->GetClass()->GetComponentSize();
+      size_t size = array->GetLength() * component_size;
+      void* data = new uint64_t[RoundUp(size, 8) / 8];
+      memcpy(data, array->GetData(), size);
+      return reinterpret_cast<CArrayT>(data);
+    } else {
+      if (is_copy != nullptr) {
+        *is_copy = JNI_FALSE;
+      }
+      return reinterpret_cast<CArrayT>(array->GetData());
     }
-    return array->GetData();
   }
 
-  template <typename ArrayT>
-  static void ReleasePrimitiveArray(JNIEnv* env, ArrayT java_array, jint mode) {
-    if (mode != JNI_COMMIT) {
-      ScopedObjectAccess soa(env);
-      Array* array = soa.Decode<Array*>(java_array);
-      UnpinPrimitiveArray(soa, array);
+  template <typename ArrayT, typename ElementT>
+  static void ReleasePrimitiveArray(JNIEnv* env, ArrayT java_array, ElementT* elements, jint mode) {
+    ScopedObjectAccess soa(env);
+    Array* array = soa.Decode<Array*>(java_array);
+    size_t component_size = array->GetClass()->GetComponentSize();
+    void* array_data = array->GetRawData(component_size);
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    bool is_copy = array_data != reinterpret_cast<void*>(elements);
+    size_t bytes = array->GetLength() * component_size;
+    VLOG(heap) << "Release primitive array " << env << " array_data " << array_data
+               << " elements " << reinterpret_cast<void*>(elements);
+    if (!is_copy && heap->IsMovableObject(array)) {
+      heap->DecrementDisableGC(soa.Self());
     }
+    // Don't need to copy if we had a direct pointer.
+    if (mode != JNI_ABORT && is_copy) {
+      memcpy(array_data, elements, bytes);
+    }
+    if (mode != JNI_COMMIT) {
+      if (is_copy) {
+        delete[] reinterpret_cast<uint64_t*>(elements);
+      }
+    }
+    // TODO: Do we always unpin primitive array?
+    UnpinPrimitiveArray(soa, array);
   }
 
   template <typename JavaArrayT, typename JavaT, typename ArrayT>
@@ -2854,6 +2904,18 @@
 JNIEnvExt::~JNIEnvExt() {
 }
 
+jobject JNIEnvExt::NewLocalRef(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (obj == nullptr) {
+    return nullptr;
+  }
+  return reinterpret_cast<jobject>(locals.Add(local_ref_cookie, obj));
+}
+
+void JNIEnvExt::DeleteLocalRef(jobject obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (obj != nullptr) {
+    locals.Remove(local_ref_cookie, reinterpret_cast<IndirectRef>(obj));
+  }
+}
 void JNIEnvExt::SetCheckJniEnabled(bool enabled) {
   check_jni = enabled;
   functions = enabled ? GetCheckJniNativeInterface() : &gJniNativeInterface;
@@ -3199,7 +3261,7 @@
     // the comments in the JNI FindClass function.)
     typedef int (*JNI_OnLoadFn)(JavaVM*, void*);
     JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym);
-    ClassLoader* old_class_loader = self->GetClassLoaderOverride();
+    SirtRef<ClassLoader> old_class_loader(self, self->GetClassLoaderOverride());
     self->SetClassLoaderOverride(class_loader);
 
     int version = 0;
@@ -3209,7 +3271,7 @@
       version = (*jni_on_load)(this, NULL);
     }
 
-    self->SetClassLoaderOverride(old_class_loader);
+    self->SetClassLoaderOverride(old_class_loader.get());
 
     if (version == JNI_ERR) {
       StringAppendF(detail, "JNI_ERR returned from JNI_OnLoad in \"%s\"", path.c_str());
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 888d5e5..96f7ae0 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -162,6 +162,9 @@
     return Offset(OFFSETOF_MEMBER(JNIEnvExt, self));
   }
 
+  jobject NewLocalRef(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DeleteLocalRef(jobject obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   Thread* const self;
   JavaVMExt* vm;
 
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index c389580..26b1836 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -86,19 +86,19 @@
     const char* class_name = is_static ? "StaticLeafMethods" : "NonStaticLeafMethods";
     jobject jclass_loader(LoadDex(class_name));
     Thread* self = Thread::Current();
+    SirtRef<mirror::ClassLoader> null_class_loader(self, nullptr);
     SirtRef<mirror::ClassLoader>
         class_loader(self,
                      ScopedObjectAccessUnchecked(self).Decode<mirror::ClassLoader*>(jclass_loader));
     if (is_static) {
-      CompileDirectMethod(class_loader.get(), class_name, method_name, method_signature);
+      CompileDirectMethod(class_loader, class_name, method_name, method_signature);
     } else {
-      CompileVirtualMethod(NULL, "java.lang.Class", "isFinalizable", "()Z");
-      CompileDirectMethod(NULL, "java.lang.Object", "<init>", "()V");
-      CompileVirtualMethod(class_loader.get(), class_name, method_name, method_signature);
+      CompileVirtualMethod(null_class_loader, "java.lang.Class", "isFinalizable", "()Z");
+      CompileDirectMethod(null_class_loader, "java.lang.Object", "<init>", "()V");
+      CompileVirtualMethod(class_loader, class_name, method_name, method_signature);
     }
 
-    mirror::Class* c = class_linker_->FindClass(DotToDescriptor(class_name).c_str(),
-                                                class_loader.get());
+    mirror::Class* c = class_linker_->FindClass(DotToDescriptor(class_name).c_str(), class_loader);
     CHECK(c != NULL);
 
     method = is_static ? c->FindDirectMethod(method_name, method_signature)
@@ -1081,7 +1081,6 @@
   EXPECT_EQ(memcmp(&src_buf[0], xs, size * sizeof(scalar_type)), 0) \
     << # get_elements_fn " not equal"; \
   env_->release_elements_fn(a, xs, 0); \
-  EXPECT_EQ(reinterpret_cast<uintptr_t>(v), reinterpret_cast<uintptr_t>(xs))
 
 TEST_F(JniInternalTest, BooleanArrays) {
   EXPECT_PRIMITIVE_ARRAY(NewBooleanArray, GetBooleanArrayRegion, SetBooleanArrayRegion,
@@ -1337,7 +1336,7 @@
 
   jboolean is_copy = JNI_FALSE;
   chars = env_->GetStringChars(s, &is_copy);
-  EXPECT_EQ(JNI_FALSE, is_copy);
+  EXPECT_EQ(JNI_TRUE, is_copy);
   EXPECT_EQ(expected[0], chars[0]);
   EXPECT_EQ(expected[1], chars[1]);
   EXPECT_EQ(expected[2], chars[2]);
@@ -1361,7 +1360,8 @@
 
   jboolean is_copy = JNI_FALSE;
   chars = env_->GetStringCritical(s, &is_copy);
-  EXPECT_EQ(JNI_FALSE, is_copy);
+  // TODO: Fix GetStringCritical to use the same mechanism as GetPrimitiveArrayElementsCritical.
+  EXPECT_EQ(JNI_TRUE, is_copy);
   EXPECT_EQ(expected[0], chars[0]);
   EXPECT_EQ(expected[1], chars[1]);
   EXPECT_EQ(expected[2], chars[2]);
@@ -1669,9 +1669,9 @@
   jobject jclass_loader = LoadDex("Main");
   SirtRef<mirror::ClassLoader>
       class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(jclass_loader));
-  CompileDirectMethod(class_loader.get(), "Main", "main", "([Ljava/lang/String;)V");
+  CompileDirectMethod(class_loader, "Main", "main", "([Ljava/lang/String;)V");
 
-  mirror::Class* klass = class_linker_->FindClass("LMain;", class_loader.get());
+  mirror::Class* klass = class_linker_->FindClass("LMain;", class_loader);
   ASSERT_TRUE(klass != NULL);
 
   mirror::ArtMethod* method = klass->FindDirectMethod("main", "([Ljava/lang/String;)V");
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index efd3d9d..aea10c2 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -36,6 +36,11 @@
   return reinterpret_cast<Monitor*>(value_ << kStateSize);
 }
 
+inline size_t LockWord::ForwardingAddress() const {
+  DCHECK_EQ(GetState(), kForwardingAddress);
+  return static_cast<size_t>(value_ << kStateSize);
+}
+
 inline LockWord::LockWord() : value_(0) {
   DCHECK_EQ(GetState(), kUnlocked);
 }
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 1882ae6..d24a3bb 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -21,6 +21,7 @@
 #include <stdint.h>
 
 #include "base/logging.h"
+#include "utils.h"
 
 namespace art {
 namespace mirror {
@@ -73,6 +74,7 @@
     kStateThinOrUnlocked = 0,
     kStateFat = 1,
     kStateHash = 2,
+    kStateForwardingAddress = 3,
 
     // When the state is kHashCode, the non-state bits hold the hashcode.
     kHashShift = 0,
@@ -86,6 +88,11 @@
                      (kStateThinOrUnlocked << kStateShift));
   }
 
+  static LockWord FromForwardingAddress(size_t target) {
+    DCHECK(IsAligned < 1 << kStateSize>(target));
+    return LockWord((target >> kStateSize) | (kStateForwardingAddress << kStateShift));
+  }
+
   static LockWord FromHashCode(uint32_t hash_code) {
     CHECK_LE(hash_code, static_cast<uint32_t>(kHashMask));
     return LockWord((hash_code << kHashShift) | (kStateHash << kStateShift));
@@ -96,19 +103,25 @@
     kThinLocked,  // Single uncontended owner.
     kFatLocked,   // See associated monitor.
     kHashCode,    // Lock word contains an identity hash.
+    kForwardingAddress,  // Lock word contains the forwarding address of an object.
   };
 
   LockState GetState() const {
-    uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
-    if (value_ == 0) {
+    if (UNLIKELY(value_ == 0)) {
       return kUnlocked;
-    } else if (internal_state == kStateThinOrUnlocked) {
-      return kThinLocked;
-    } else if (internal_state == kStateHash) {
-      return kHashCode;
     } else {
-      DCHECK_EQ(internal_state, static_cast<uint32_t>(kStateFat));
-      return kFatLocked;
+      uint32_t internal_state = (value_ >> kStateShift) & kStateMask;
+      switch (internal_state) {
+        case kStateThinOrUnlocked:
+          return kThinLocked;
+        case kStateHash:
+          return kHashCode;
+        case kStateForwardingAddress:
+          return kForwardingAddress;
+        default:
+          DCHECK_EQ(internal_state, static_cast<uint32_t>(kStateFat));
+          return kFatLocked;
+      }
     }
   }
 
@@ -121,6 +134,9 @@
   // Return the Monitor encoded in a fat lock.
   Monitor* FatLockMonitor() const;
 
+  // Return the forwarding address stored in the monitor.
+  size_t ForwardingAddress() const;
+
   // Default constructor with no lock ownership.
   LockWord();
 
diff --git a/runtime/locks.h b/runtime/locks.h
index 2262218..2308e95 100644
--- a/runtime/locks.h
+++ b/runtime/locks.h
@@ -37,6 +37,9 @@
   kThreadSuspendCountLock,
   kAbortLock,
   kJdwpSocketLock,
+  kRosAllocGlobalLock,
+  kRosAllocBracketLock,
+  kRosAllocBulkFreeLock,
   kAllocSpaceLock,
   kMarkSweepMarkStackLock,
   kDefaultMutexLevel,
diff --git a/runtime/mapping_table.h b/runtime/mapping_table.h
index 2162008..c468c1e 100644
--- a/runtime/mapping_table.h
+++ b/runtime/mapping_table.h
@@ -30,7 +30,7 @@
 
   uint32_t TotalSize() const PURE {
     const uint8_t* table = encoded_table_;
-    if (table == NULL) {
+    if (table == nullptr) {
       return 0;
     } else {
       return DecodeUnsignedLeb128(&table);
@@ -39,7 +39,7 @@
 
   uint32_t DexToPcSize() const PURE {
     const uint8_t* table = encoded_table_;
-    if (table == NULL) {
+    if (table == nullptr) {
       return 0;
     } else {
       uint32_t total_size = DecodeUnsignedLeb128(&table);
@@ -50,9 +50,11 @@
 
   const uint8_t* FirstDexToPcPtr() const {
     const uint8_t* table = encoded_table_;
-    if (table != NULL) {
-      DecodeUnsignedLeb128(&table);  // Total_size, unused.
+    if (table != nullptr) {
+      uint32_t total_size = DecodeUnsignedLeb128(&table);
       uint32_t pc_to_dex_size = DecodeUnsignedLeb128(&table);
+      // We must have dex to pc entries or else the loop will go beyond the end of the table.
+      DCHECK_GT(total_size, pc_to_dex_size);
       for (uint32_t i = 0; i < pc_to_dex_size; ++i) {
         DecodeUnsignedLeb128(&table);  // Move ptr past native PC.
         DecodeUnsignedLeb128(&table);  // Move ptr past dex PC.
@@ -64,13 +66,15 @@
   class DexToPcIterator {
    public:
     DexToPcIterator(const MappingTable* table, uint32_t element) :
-        table_(table), element_(element), end_(table_->DexToPcSize()), encoded_table_ptr_(NULL),
+        table_(table), element_(element), end_(table_->DexToPcSize()), encoded_table_ptr_(nullptr),
         native_pc_offset_(0), dex_pc_(0) {
-      if (element == 0) {
-        encoded_table_ptr_ = table_->FirstDexToPcPtr();
-        native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-        dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-      } else {
+      if (element == 0) {  // An iterator wanted from the start.
+        if (end_ > 0) {
+          encoded_table_ptr_ = table_->FirstDexToPcPtr();
+          native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+          dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+        }
+      } else {  // An iterator wanted from the end.
         DCHECK_EQ(table_->DexToPcSize(), element);
       }
     }
@@ -100,7 +104,7 @@
     const MappingTable* const table_;  // The original table.
     uint32_t element_;  // A value in the range 0 to end_.
     const uint32_t end_;  // Equal to table_->DexToPcSize().
-    const uint8_t* encoded_table_ptr_;  // Either NULL or points to encoded data after this entry.
+    const uint8_t* encoded_table_ptr_;  // Either nullptr or points to encoded data after this entry.
     uint32_t native_pc_offset_;  // The current value of native pc offset.
     uint32_t dex_pc_;  // The current value of dex pc.
   };
@@ -116,7 +120,7 @@
 
   uint32_t PcToDexSize() const PURE {
     const uint8_t* table = encoded_table_;
-    if (table == NULL) {
+    if (table == nullptr) {
       return 0;
     } else {
       DecodeUnsignedLeb128(&table);  // Total_size, unused.
@@ -127,7 +131,7 @@
 
   const uint8_t* FirstPcToDexPtr() const {
     const uint8_t* table = encoded_table_;
-    if (table != NULL) {
+    if (table != nullptr) {
       DecodeUnsignedLeb128(&table);  // Total_size, unused.
       DecodeUnsignedLeb128(&table);  // PC to Dex size, unused.
     }
@@ -137,13 +141,15 @@
   class PcToDexIterator {
    public:
     PcToDexIterator(const MappingTable* table, uint32_t element) :
-        table_(table), element_(element), end_(table_->PcToDexSize()), encoded_table_ptr_(NULL),
+        table_(table), element_(element), end_(table_->PcToDexSize()), encoded_table_ptr_(nullptr),
         native_pc_offset_(0), dex_pc_(0) {
-      if (element == 0) {
-        encoded_table_ptr_ = table_->FirstPcToDexPtr();
-        native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-        dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-      } else {
+      if (element == 0) {  // An iterator wanted from the start.
+        if (end_ > 0) {
+          encoded_table_ptr_ = table_->FirstPcToDexPtr();
+          native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+          dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+        }
+      } else {  // An iterator wanted from the end.
         DCHECK_EQ(table_->PcToDexSize(), element);
       }
     }
@@ -173,7 +179,7 @@
     const MappingTable* const table_;  // The original table.
     uint32_t element_;  // A value in the range 0 to PcToDexSize.
     const uint32_t end_;  // Equal to table_->PcToDexSize().
-    const uint8_t* encoded_table_ptr_;  // Either NULL or points to encoded data after this entry.
+    const uint8_t* encoded_table_ptr_;  // Either null or points to encoded data after this entry.
     uint32_t native_pc_offset_;  // The current value of native pc offset.
     uint32_t dex_pc_;  // The current value of dex pc.
   };
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 3afb606..39e838f 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -133,12 +133,13 @@
                                               fd,
                                               page_aligned_offset));
   if (actual == MAP_FAILED) {
+    std::string strerr(strerror(errno));
     std::string maps;
     ReadFileToString("/proc/self/maps", &maps);
-    *error_msg = StringPrintf("mmap(%p, %zd, %x, %x, %d, %lld) of file '%s' failed\n%s",
+    *error_msg = StringPrintf("mmap(%p, %zd, %x, %x, %d, %lld) of file '%s' failed: %s\n%s",
                               page_aligned_addr, page_aligned_byte_count, prot, flags, fd,
-                              static_cast<int64_t>(page_aligned_offset),
-                              filename, maps.c_str());
+                              static_cast<int64_t>(page_aligned_offset), filename, strerr.c_str(),
+                              maps.c_str());
     return NULL;
   }
   return new MemMap("file", actual + page_offset, byte_count, actual, page_aligned_byte_count,
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index c60e714..a754b69 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -58,44 +58,54 @@
   return size;
 }
 
-static inline Array* SetArrayLength(Array* array, size_t length) {
-  if (LIKELY(array != NULL)) {
+// Used for setting the array length in the allocation code path to ensure it is guarded by a CAS.
+class SetLengthVisitor {
+ public:
+  explicit SetLengthVisitor(int32_t length) : length_(length) {
+  }
+
+  void operator()(mirror::Object* obj) const {
+    mirror::Array* array = obj->AsArray();
     DCHECK(array->IsArrayInstance());
-    array->SetLength(length);
+    array->SetLength(length_);
   }
-  return array;
-}
 
-inline Array* Array::AllocInstrumented(Thread* self, Class* array_class, int32_t component_count,
-                                       size_t component_size) {
+ private:
+  const int32_t length_;
+};
+
+template <bool kIsInstrumented>
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
+                           size_t component_size, gc::AllocatorType allocator_type) {
   size_t size = ComputeArraySize(self, array_class, component_count, component_size);
   if (UNLIKELY(size == 0)) {
-    return NULL;
+    return nullptr;
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  Array* array = down_cast<Array*>(heap->AllocObjectInstrumented(self, array_class, size));
-  return SetArrayLength(array, component_count);
+  SetLengthVisitor visitor(component_count);
+  return down_cast<Array*>(
+      heap->AllocObjectWithAllocator<kIsInstrumented>(self, array_class, size, allocator_type,
+                                                      visitor));
 }
 
-inline Array* Array::AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count,
-                                         size_t component_size) {
-  size_t size = ComputeArraySize(self, array_class, component_count, component_size);
-  if (UNLIKELY(size == 0)) {
-    return NULL;
-  }
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  Array* array = down_cast<Array*>(heap->AllocObjectUninstrumented(self, array_class, size));
-  return SetArrayLength(array, component_count);
-}
-
-inline Array* Array::AllocInstrumented(Thread* self, Class* array_class, int32_t component_count) {
+template <bool kIsInstrumented>
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
+                           gc::AllocatorType allocator_type) {
   DCHECK(array_class->IsArrayClass());
-  return AllocInstrumented(self, array_class, component_count, array_class->GetComponentSize());
+  return Alloc<kIsInstrumented>(self, array_class, component_count, array_class->GetComponentSize(),
+                                allocator_type);
+}
+template <bool kIsInstrumented>
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count) {
+  return Alloc<kIsInstrumented>(self, array_class, component_count,
+               Runtime::Current()->GetHeap()->GetCurrentAllocator());
 }
 
-inline Array* Array::AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count) {
-  DCHECK(array_class->IsArrayClass());
-  return AllocUninstrumented(self, array_class, component_count, array_class->GetComponentSize());
+template <bool kIsInstrumented>
+inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
+                           size_t component_size) {
+  return Alloc<kIsInstrumented>(self, array_class, component_count, component_size,
+               Runtime::Current()->GetHeap()->GetCurrentAllocator());
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 020085d..00b88db 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -41,15 +41,15 @@
 // Recursively create an array with multiple dimensions.  Elements may be
 // Objects or primitive types.
 static Array* RecursiveCreateMultiArray(Thread* self, Class* array_class, int current_dimension,
-                                        IntArray* dimensions)
+                                        SirtRef<mirror::IntArray>& dimensions)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   int32_t array_length = dimensions->Get(current_dimension);
-  SirtRef<Array> new_array(self, Array::Alloc(self, array_class, array_length));
+  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class, array_length));
   if (UNLIKELY(new_array.get() == NULL)) {
     CHECK(self->IsExceptionPending());
     return NULL;
   }
-  if ((current_dimension + 1) < dimensions->GetLength()) {
+  if (current_dimension + 1 < dimensions->GetLength()) {
     // Create a new sub-array in every element of the array.
     for (int32_t i = 0; i < array_length; i++) {
       Array* sub_array = RecursiveCreateMultiArray(self, array_class->GetComponentType(),
@@ -87,13 +87,15 @@
 
   // Find/generate the array class.
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Class* array_class = class_linker->FindClass(descriptor.c_str(), element_class->GetClassLoader());
+  SirtRef<mirror::ClassLoader> class_loader(self, element_class->GetClassLoader());
+  Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
   if (UNLIKELY(array_class == NULL)) {
     CHECK(self->IsExceptionPending());
     return NULL;
   }
   // create the array
-  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, dimensions);
+  SirtRef<mirror::IntArray> sirt_dimensions(self, dimensions);
+  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, sirt_dimensions);
   if (UNLIKELY(new_array == NULL)) {
     CHECK(self->IsExceptionPending());
     return NULL;
@@ -112,7 +114,7 @@
 template<typename T>
 PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) {
   DCHECK(array_class_ != NULL);
-  Array* raw_array = Array::Alloc(self, array_class_, length, sizeof(T));
+  Array* raw_array = Array::Alloc<true>(self, array_class_, length, sizeof(T));
   return down_cast<PrimitiveArray<T>*>(raw_array);
 }
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 570dcaa..a332f97 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -18,33 +18,32 @@
 #define ART_RUNTIME_MIRROR_ARRAY_H_
 
 #include "object.h"
+#include "gc/heap.h"
 
 namespace art {
 namespace mirror {
 
 class MANAGED Array : public Object {
  public:
-  // A convenience for code that doesn't know the component size,
-  // and doesn't want to have to work it out itself.
-  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return AllocInstrumented(self, array_class, component_count);
-  }
-  static Array* AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static Array* AllocInstrumented(Thread* self, Class* array_class, int32_t component_count)
+  // A convenience for code that doesn't know the component size, and doesn't want to have to work
+  // it out itself.
+  template <bool kIsInstrumented>
+  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
+                      gc::AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template <bool kIsInstrumented>
+  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
+                      size_t component_size, gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
+  static Array* Alloc(Thread* self, Class* array_class, int32_t component_count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kIsInstrumented>
   static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
                       size_t component_size)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return AllocInstrumented(self, array_class, component_count, component_size);
-  }
-  static Array* AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count,
-                                    size_t component_size)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static Array* AllocInstrumented(Thread* self, Class* array_class, int32_t component_count,
-                                  size_t component_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions)
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 7f3a302..3a28974 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -248,7 +248,7 @@
   if (method->IsDirect()) {
     return method;
   }
-  if (method->GetDeclaringClass()->IsInterface()) {
+  if (method->GetDeclaringClass()->IsInterface() && !method->IsMiranda()) {
     return FindVirtualMethodForInterface(method);
   }
   return FindVirtualMethodForVirtual(method);
@@ -357,14 +357,20 @@
   DCHECK_GE(this->object_size_, sizeof(Object));
 }
 
-inline Object* Class::AllocObjectInstrumented(Thread* self) {
+template <bool kIsInstrumented>
+inline Object* Class::Alloc(Thread* self, gc::AllocatorType allocator_type) {
   CheckObjectAlloc();
-  return Runtime::Current()->GetHeap()->AllocObjectInstrumented(self, this, this->object_size_);
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  return heap->AllocObjectWithAllocator<kIsInstrumented>(self, this, this->object_size_,
+                                                         allocator_type);
 }
 
-inline Object* Class::AllocObjectUninstrumented(Thread* self) {
-  CheckObjectAlloc();
-  return Runtime::Current()->GetHeap()->AllocObjectUninstrumented(self, this, this->object_size_);
+inline Object* Class::AllocObject(Thread* self) {
+  return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentAllocator());
+}
+
+inline Object* Class::AllocNonMovableObject(Thread* self) {
+  return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index f3cb54a..cdc5ab2 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -52,7 +52,8 @@
 
 void Class::SetStatus(Status new_status, Thread* self) {
   Status old_status = GetStatus();
-  bool class_linker_initialized = Runtime::Current()->GetClassLinker() != nullptr;
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  bool class_linker_initialized = class_linker != nullptr && class_linker->IsInitialized();
   if (LIKELY(class_linker_initialized)) {
     if (UNLIKELY(new_status <= old_status && new_status != kStatusError)) {
       LOG(FATAL) << "Unexpected change back of class status for " << PrettyClass(this) << " "
@@ -588,7 +589,6 @@
 ArtField* Class::FindStaticField(const StringPiece& name, const StringPiece& type) {
   // Is the field in this class (or its interfaces), or any of its
   // superclasses (or their interfaces)?
-  ClassHelper kh;
   for (Class* k = this; k != NULL; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(name, type);
@@ -596,7 +596,7 @@
       return f;
     }
     // Is this field in any of this class' interfaces?
-    kh.ChangeClass(k);
+    ClassHelper kh(k);
     for (uint32_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
       Class* interface = kh.GetDirectInterface(i);
       f = interface->FindStaticField(name, type);
@@ -609,7 +609,6 @@
 }
 
 ArtField* Class::FindStaticField(const DexCache* dex_cache, uint32_t dex_field_idx) {
-  ClassHelper kh;
   for (Class* k = this; k != NULL; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
@@ -617,7 +616,7 @@
       return f;
     }
     // Is this field in any of this class' interfaces?
-    kh.ChangeClass(k);
+    ClassHelper kh(k);
     for (uint32_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
       Class* interface = kh.GetDirectInterface(i);
       f = interface->FindStaticField(dex_cache, dex_field_idx);
@@ -631,7 +630,6 @@
 
 ArtField* Class::FindField(const StringPiece& name, const StringPiece& type) {
   // Find a field using the JLS field resolution order
-  ClassHelper kh;
   for (Class* k = this; k != NULL; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredInstanceField(name, type);
@@ -643,7 +641,7 @@
       return f;
     }
     // Is this field in any of this class' interfaces?
-    kh.ChangeClass(k);
+    ClassHelper kh(k);
     for (uint32_t i = 0; i < kh.NumDirectInterfaces(); ++i) {
       Class* interface = kh.GetDirectInterface(i);
       f = interface->FindStaticField(name, type);
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index ed1aad3..5f64bb4 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_H_
 #define ART_RUNTIME_MIRROR_CLASS_H_
 
+#include "gc/heap.h"
 #include "modifiers.h"
 #include "object.h"
 #include "primitive.h"
@@ -377,12 +378,14 @@
   }
 
   // Creates a raw object instance but does not invoke the default constructor.
-  Object* AllocObject(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return AllocObjectInstrumented(self);
-  }
+  template <bool kIsInstrumented>
+  ALWAYS_INLINE Object* Alloc(Thread* self, gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Object* AllocObjectUninstrumented(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  Object* AllocObjectInstrumented(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* AllocObject(Thread* self)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* AllocNonMovableObject(Thread* self)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsVariableSize() const {
     // Classes and arrays vary in size, and so the object_size_ field cannot
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index bd187c1..008a173 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -39,49 +39,50 @@
 namespace art {
 namespace mirror {
 
-Object* Object::Clone(Thread* self) {
-  Class* c = GetClass();
-  DCHECK(!c->IsClassClass());
-
-  // Object::SizeOf gets the right size even if we're an array.
-  // Using c->AllocObject() here would be wrong.
-  size_t num_bytes = SizeOf();
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  SirtRef<Object> copy(self, heap->AllocObject(self, c, num_bytes));
-  if (copy.get() == NULL) {
-    return NULL;
-  }
-
+static Object* CopyObject(Thread* self, mirror::Object* dest, mirror::Object* src, size_t num_bytes)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Copy instance data.  We assume memcpy copies by words.
   // TODO: expose and use move32.
-  byte* src_bytes = reinterpret_cast<byte*>(this);
-  byte* dst_bytes = reinterpret_cast<byte*>(copy.get());
+  byte* src_bytes = reinterpret_cast<byte*>(src);
+  byte* dst_bytes = reinterpret_cast<byte*>(dest);
   size_t offset = sizeof(Object);
   memcpy(dst_bytes + offset, src_bytes + offset, num_bytes - offset);
-
+  gc::Heap* heap = Runtime::Current()->GetHeap();
   // Perform write barriers on copied object references.
+  Class* c = src->GetClass();
   if (c->IsArrayClass()) {
     if (!c->GetComponentType()->IsPrimitive()) {
-      const ObjectArray<Object>* array = copy->AsObjectArray<Object>();
-      heap->WriteBarrierArray(copy.get(), 0, array->GetLength());
+      const ObjectArray<Object>* array = dest->AsObjectArray<Object>();
+      heap->WriteBarrierArray(dest, 0, array->GetLength());
     }
   } else {
-    for (const Class* klass = c; klass != NULL; klass = klass->GetSuperClass()) {
-      size_t num_reference_fields = klass->NumReferenceInstanceFields();
-      for (size_t i = 0; i < num_reference_fields; ++i) {
-        ArtField* field = klass->GetInstanceField(i);
-        MemberOffset field_offset = field->GetOffset();
-        const Object* ref = copy->GetFieldObject<const Object*>(field_offset, false);
-        heap->WriteBarrierField(copy.get(), field_offset, ref);
-      }
-    }
+    heap->WriteBarrierEveryFieldOf(dest);
   }
-
   if (c->IsFinalizable()) {
-    heap->AddFinalizerReference(Thread::Current(), copy.get());
+    SirtRef<Object> sirt_dest(self, dest);
+    heap->AddFinalizerReference(self, dest);
+    return sirt_dest.get();
   }
+  return dest;
+}
 
-  return copy.get();
+Object* Object::Clone(Thread* self) {
+  CHECK(!IsClass()) << "Can't clone classes.";
+  // Object::SizeOf gets the right size even if we're an array. Using c->AllocObject() here would
+  // be wrong.
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  size_t num_bytes = SizeOf();
+  SirtRef<Object> this_object(self, this);
+  Object* copy;
+  if (heap->IsMovableObject(this)) {
+    copy = heap->AllocObject<true>(self, GetClass(), num_bytes);
+  } else {
+    copy = heap->AllocNonMovableObject<true>(self, GetClass(), num_bytes);
+  }
+  if (LIKELY(copy != nullptr)) {
+    return CopyObject(self, copy, this_object.get(), num_bytes);
+  }
+  return copy;
 }
 
 int32_t Object::GenerateIdentityHashCode() {
@@ -96,8 +97,9 @@
 }
 
 int32_t Object::IdentityHashCode() const {
+  mirror::Object* current_this = const_cast<mirror::Object*>(this);
   while (true) {
-    LockWord lw = GetLockWord();
+    LockWord lw = current_this->GetLockWord();
     switch (lw.GetState()) {
       case LockWord::kUnlocked: {
         // Try to compare and swap in a new hash, if we succeed we will return the hash on the next
@@ -112,7 +114,10 @@
       case LockWord::kThinLocked: {
         // Inflate the thin lock to a monitor and stick the hash code inside of the monitor.
         Thread* self = Thread::Current();
-        Monitor::InflateThinLocked(self, const_cast<Object*>(this), lw, GenerateIdentityHashCode());
+        SirtRef<mirror::Object> sirt_this(self, current_this);
+        Monitor::InflateThinLocked(self, sirt_this, lw, GenerateIdentityHashCode());
+        // A GC may have occurred when we switched to kBlocked.
+        current_this = sirt_this.get();
         break;
       }
       case LockWord::kFatLocked: {
@@ -124,6 +129,10 @@
       case LockWord::kHashCode: {
         return lw.GetHashCode();
       }
+      default: {
+        LOG(FATAL) << "Invalid state during hashcode " << lw.GetState();
+        break;
+      }
     }
   }
   LOG(FATAL) << "Unreachable";
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index e8ea3f2..0fb2039 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -30,6 +30,7 @@
 class Monitor;
 struct ObjectOffsets;
 class Thread;
+template <typename T> class SirtRef;
 
 namespace mirror {
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index abc88a3..be49b42 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -23,22 +23,32 @@
 #include "mirror/art_field.h"
 #include "mirror/class.h"
 #include "runtime.h"
+#include "sirt_ref.h"
 #include "thread.h"
 
 namespace art {
 namespace mirror {
 
 template<class T>
-inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class, int32_t length) {
-  Array* array = Array::Alloc(self, object_array_class, length, sizeof(Object*));
-  if (UNLIKELY(array == NULL)) {
-    return NULL;
+inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
+                                             int32_t length, gc::AllocatorType allocator_type) {
+  Array* array = Array::Alloc<true>(self, object_array_class, length, sizeof(Object*),
+                                    allocator_type);
+  if (UNLIKELY(array == nullptr)) {
+    return nullptr;
   } else {
     return array->AsObjectArray<T>();
   }
 }
 
 template<class T>
+inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
+                                             int32_t length) {
+  return Alloc(self, object_array_class, length,
+               Runtime::Current()->GetHeap()->GetCurrentAllocator());
+}
+
+template<class T>
 inline T* ObjectArray<T>::Get(int32_t i) const {
   if (UNLIKELY(!IsValidIndex(i))) {
     return NULL;
@@ -134,9 +144,14 @@
 
 template<class T>
 inline ObjectArray<T>* ObjectArray<T>::CopyOf(Thread* self, int32_t new_length) {
-  ObjectArray<T>* new_array = Alloc(self, GetClass(), new_length);
-  if (LIKELY(new_array != NULL)) {
-    Copy(this, 0, new_array, 0, std::min(GetLength(), new_length));
+  // We may get copied by a compacting GC.
+  SirtRef<ObjectArray<T> > sirt_this(self, this);
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  gc::AllocatorType allocator_type = heap->IsMovableObject(this) ? heap->GetCurrentAllocator() :
+      heap->GetCurrentNonMovingAllocator();
+  ObjectArray<T>* new_array = Alloc(self, GetClass(), new_length, allocator_type);
+  if (LIKELY(new_array != nullptr)) {
+    Copy(sirt_this.get(), 0, new_array, 0, std::min(sirt_this->GetLength(), new_length));
   }
   return new_array;
 }
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 09ff519..5da8845 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_OBJECT_ARRAY_H_
 
 #include "array.h"
+#include "gc/heap.h"
 
 namespace art {
 namespace mirror {
@@ -25,6 +26,10 @@
 template<class T>
 class MANAGED ObjectArray : public Array {
  public:
+  static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length,
+                               gc::AllocatorType allocator_type)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index d0d1ee4..8272ff8 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -144,15 +144,15 @@
 TEST_F(ObjectTest, AllocArray) {
   ScopedObjectAccess soa(Thread::Current());
   Class* c = class_linker_->FindSystemClass("[I");
-  SirtRef<Array> a(soa.Self(), Array::Alloc(soa.Self(), c, 1));
+  SirtRef<Array> a(soa.Self(), Array::Alloc<true>(soa.Self(), c, 1));
   ASSERT_TRUE(c == a->GetClass());
 
   c = class_linker_->FindSystemClass("[Ljava/lang/Object;");
-  a.reset(Array::Alloc(soa.Self(), c, 1));
+  a.reset(Array::Alloc<true>(soa.Self(), c, 1));
   ASSERT_TRUE(c == a->GetClass());
 
   c = class_linker_->FindSystemClass("[[Ljava/lang/Object;");
-  a.reset(Array::Alloc(soa.Self(), c, 1));
+  a.reset(Array::Alloc<true>(soa.Self(), c, 1));
   ASSERT_TRUE(c == a->GetClass());
 }
 
@@ -221,7 +221,8 @@
       java_lang_dex_file_->GetIndexForStringId(*string_id));
   ASSERT_TRUE(type_id != NULL);
   uint32_t type_idx = java_lang_dex_file_->GetIndexForTypeId(*type_id);
-  Object* array = CheckAndAllocArrayFromCode(type_idx, sort, 3, Thread::Current(), false);
+  Object* array = CheckAndAllocArrayFromCode(type_idx, sort, 3, Thread::Current(), false,
+                                             Runtime::Current()->GetHeap()->GetCurrentAllocator());
   EXPECT_TRUE(array->IsArrayInstance());
   EXPECT_EQ(3, array->AsArray()->GetLength());
   EXPECT_TRUE(array->GetClass()->IsArrayClass());
@@ -269,8 +270,9 @@
   const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(class_loader)[0];
   CHECK(dex_file != NULL);
 
+  SirtRef<mirror::ClassLoader> loader(soa.Self(), soa.Decode<ClassLoader*>(class_loader));
   Class* klass =
-      class_linker_->FindClass("LStaticsFromCode;", soa.Decode<ClassLoader*>(class_loader));
+      class_linker_->FindClass("LStaticsFromCode;", loader);
   ArtMethod* clinit = klass->FindClassInitializer();
   const DexFile::StringId* klass_string_id = dex_file->FindStringId("LStaticsFromCode;");
   ASSERT_TRUE(klass_string_id != NULL);
@@ -392,6 +394,7 @@
 }
 
 TEST_F(ObjectTest, DescriptorCompare) {
+  // Two classloaders conflicts in compile_time_class_paths_.
   ScopedObjectAccess soa(Thread::Current());
   ClassLinker* linker = class_linker_;
 
@@ -400,9 +403,9 @@
   SirtRef<ClassLoader> class_loader_1(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader_1));
   SirtRef<ClassLoader> class_loader_2(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader_2));
 
-  Class* klass1 = linker->FindClass("LProtoCompare;", class_loader_1.get());
+  Class* klass1 = linker->FindClass("LProtoCompare;", class_loader_1);
   ASSERT_TRUE(klass1 != NULL);
-  Class* klass2 = linker->FindClass("LProtoCompare2;", class_loader_2.get());
+  Class* klass2 = linker->FindClass("LProtoCompare2;", class_loader_2);
   ASSERT_TRUE(klass2 != NULL);
 
   ArtMethod* m1_1 = klass1->GetVirtualMethod(0);
@@ -468,8 +471,8 @@
   jobject jclass_loader = LoadDex("XandY");
   SirtRef<ClassLoader> class_loader(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader));
 
-  Class* X = class_linker_->FindClass("LX;", class_loader.get());
-  Class* Y = class_linker_->FindClass("LY;", class_loader.get());
+  Class* X = class_linker_->FindClass("LX;", class_loader);
+  Class* Y = class_linker_->FindClass("LY;", class_loader);
   ASSERT_TRUE(X != NULL);
   ASSERT_TRUE(Y != NULL);
 
@@ -501,8 +504,8 @@
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("XandY");
   SirtRef<ClassLoader> class_loader(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader));
-  Class* X = class_linker_->FindClass("LX;", class_loader.get());
-  Class* Y = class_linker_->FindClass("LY;", class_loader.get());
+  Class* X = class_linker_->FindClass("LX;", class_loader);
+  Class* Y = class_linker_->FindClass("LY;", class_loader);
 
   EXPECT_TRUE(X->IsAssignableFrom(X));
   EXPECT_TRUE(X->IsAssignableFrom(Y));
@@ -538,17 +541,17 @@
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("XandY");
   SirtRef<ClassLoader> class_loader(soa.Self(), soa.Decode<ClassLoader*>(jclass_loader));
-  Class* X = class_linker_->FindClass("LX;", class_loader.get());
-  Class* Y = class_linker_->FindClass("LY;", class_loader.get());
+  Class* X = class_linker_->FindClass("LX;", class_loader);
+  Class* Y = class_linker_->FindClass("LY;", class_loader);
   ASSERT_TRUE(X != NULL);
   ASSERT_TRUE(Y != NULL);
 
-  Class* YA = class_linker_->FindClass("[LY;", class_loader.get());
-  Class* YAA = class_linker_->FindClass("[[LY;", class_loader.get());
+  Class* YA = class_linker_->FindClass("[LY;", class_loader);
+  Class* YAA = class_linker_->FindClass("[[LY;", class_loader);
   ASSERT_TRUE(YA != NULL);
   ASSERT_TRUE(YAA != NULL);
 
-  Class* XAA = class_linker_->FindClass("[[LX;", class_loader.get());
+  Class* XAA = class_linker_->FindClass("[[LX;", class_loader);
   ASSERT_TRUE(XAA != NULL);
 
   Class* O = class_linker_->FindSystemClass("Ljava/lang/Object;");
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index 9d76c6b..32a50fe 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -39,19 +39,19 @@
 }
 
 StackTraceElement* StackTraceElement::Alloc(Thread* self,
-                                            String* declaring_class,
-                                            String* method_name,
-                                            String* file_name,
+                                            SirtRef<String>& declaring_class,
+                                            SirtRef<String>& method_name,
+                                            SirtRef<String>& file_name,
                                             int32_t line_number) {
   StackTraceElement* trace =
       down_cast<StackTraceElement*>(GetStackTraceElement()->AllocObject(self));
   if (LIKELY(trace != NULL)) {
     trace->SetFieldObject(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_),
-                          const_cast<String*>(declaring_class), false);
+                          declaring_class.get(), false);
     trace->SetFieldObject(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, method_name_),
-                          const_cast<String*>(method_name), false);
+                          method_name.get(), false);
     trace->SetFieldObject(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, file_name_),
-                          const_cast<String*>(file_name), false);
+                          file_name.get(), false);
     trace->SetField32(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_),
                       line_number, false);
   }
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index a9751f9..2af5128 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_STACK_TRACE_ELEMENT_H_
 
 #include "object.h"
+#include "sirt_ref.h"
 
 namespace art {
 
@@ -49,9 +50,9 @@
   }
 
   static StackTraceElement* Alloc(Thread* self,
-                                  String* declaring_class,
-                                  String* method_name,
-                                  String* file_name,
+                                  SirtRef<String>& declaring_class,
+                                  SirtRef<String>& method_name,
+                                  SirtRef<String>& file_name,
                                   int32_t line_number)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 9c93f17..b372fe7 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -123,8 +123,8 @@
                                int32_t hash_code) {
   CHECK(utf16_data_in != NULL || utf16_length == 0);
   String* string = Alloc(self, GetJavaLangString(), utf16_length);
-  if (string == NULL) {
-    return NULL;
+  if (UNLIKELY(string == nullptr)) {
+    return nullptr;
   }
   // TODO: use 16-bit wide memset variant
   CharArray* array = const_cast<CharArray*>(string->GetCharArray());
@@ -143,8 +143,8 @@
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
-  if (utf == NULL) {
-    return NULL;
+  if (UNLIKELY(utf == nullptr)) {
+    return nullptr;
   }
   size_t char_count = CountModifiedUtf8Chars(utf);
   return AllocFromModifiedUtf8(self, char_count, utf);
@@ -153,8 +153,8 @@
 String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
                                       const char* utf8_data_in) {
   String* string = Alloc(self, GetJavaLangString(), utf16_length);
-  if (string == NULL) {
-    return NULL;
+  if (UNLIKELY(string == nullptr)) {
+    return nullptr;
   }
   uint16_t* utf16_data_out =
       const_cast<uint16_t*>(string->GetCharArray()->GetData());
@@ -164,22 +164,21 @@
 }
 
 String* String::Alloc(Thread* self, Class* java_lang_String, int32_t utf16_length) {
-  SirtRef<CharArray> array(self, CharArray::Alloc(self, utf16_length));
-  if (array.get() == NULL) {
-    return NULL;
+  CharArray* array = CharArray::Alloc(self, utf16_length);
+  if (UNLIKELY(array == nullptr)) {
+    return nullptr;
   }
-  return Alloc(self, java_lang_String, array.get());
+  return Alloc(self, java_lang_String, array);
 }
 
 String* String::Alloc(Thread* self, Class* java_lang_String, CharArray* array) {
   // Hold reference in case AllocObject causes GC.
   SirtRef<CharArray> array_ref(self, array);
   String* string = down_cast<String*>(java_lang_String->AllocObject(self));
-  if (string == NULL) {
-    return NULL;
+  if (LIKELY(string != nullptr)) {
+    string->SetArray(array_ref.get());
+    string->SetCount(array_ref->GetLength());
   }
-  string->SetArray(array);
-  string->SetCount(array->GetLength());
   return string;
 }
 
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 01d8f31..7520c4d 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -117,10 +117,8 @@
 
  private:
   void SetHashCode(int32_t new_hash_code) {
-    DCHECK_EQ(0u,
-              GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), false));
-    SetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_),
-               new_hash_code, false);
+    DCHECK_EQ(0u, GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), false));
+    SetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), new_hash_code, false);
   }
 
   void SetCount(int32_t new_count) {
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 2abfd3d..af93a56 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -82,6 +82,7 @@
 Monitor::Monitor(Thread* owner, mirror::Object* obj, int32_t hash_code)
     : monitor_lock_("a monitor lock", kMonitorLock),
       monitor_contenders_("monitor contenders", monitor_lock_),
+      num_waiters_(0),
       owner_(owner),
       lock_count_(0),
       obj_(obj),
@@ -128,6 +129,10 @@
       LOG(FATAL) << "Inflating unlocked lock word";
       break;
     }
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lw.GetState();
+      return false;
+    }
   }
   LockWord fat(this);
   // Publish the updated lock word, which may race with other threads.
@@ -140,8 +145,7 @@
 }
 
 Monitor::~Monitor() {
-  CHECK(obj_ != NULL);
-  CHECK_EQ(obj_->GetLockWord().GetState(), LockWord::kFatLocked);
+  // Deflated monitors have a null object.
 }
 
 /*
@@ -222,7 +226,9 @@
       ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
       MutexLock mu2(self, monitor_lock_);  // Reacquire monitor_lock_ without mutator_lock_ for Wait.
       if (owner_ != NULL) {  // Did the owner_ give the lock up?
+        ++num_waiters_;
         monitor_contenders_.Wait(self);  // Still contended so wait.
+        --num_waiters_;
         // Woken from contention.
         if (log_contention) {
           uint64_t wait_ms = MilliTime() - wait_start_ms;
@@ -559,6 +565,43 @@
   }
 }
 
+bool Monitor::Deflate(Thread* self, mirror::Object* obj) {
+  DCHECK(obj != nullptr);
+  LockWord lw(obj->GetLockWord());
+  // If the lock isn't an inflated monitor, then we don't need to deflate anything.
+  if (lw.GetState() == LockWord::kFatLocked) {
+    Monitor* monitor = lw.FatLockMonitor();
+    CHECK(monitor != nullptr);
+    MutexLock mu(self, monitor->monitor_lock_);
+    Thread* owner = monitor->owner_;
+    if (owner != nullptr) {
+      // Can't deflate if we are locked and have a hash code.
+      if (monitor->HasHashCode()) {
+        return false;
+      }
+      // Can't deflate if our lock count is too high.
+      if (monitor->lock_count_ > LockWord::kThinLockMaxCount) {
+        return false;
+      }
+      // Can't deflate if we have anybody waiting on the CV.
+      if (monitor->num_waiters_ > 0) {
+        return false;
+      }
+      // Deflate to a thin lock.
+      obj->SetLockWord(LockWord::FromThinLockId(owner->GetTid(), monitor->lock_count_));
+    } else if (monitor->HasHashCode()) {
+      obj->SetLockWord(LockWord::FromHashCode(monitor->GetHashCode()));
+    } else {
+      // No lock and no hash, just put an empty lock word inside the object.
+      obj->SetLockWord(LockWord());
+    }
+    // The monitor is deflated, mark the object as nullptr so that we know to delete it during the
+    // next GC.
+    monitor->obj_ = nullptr;
+  }
+  return true;
+}
+
 /*
  * Changes the shape of a monitor from thin to fat, preserving the internal lock state. The calling
  * thread must own the lock or the owner must be suspended. There's a race with other threads
@@ -577,13 +620,13 @@
   }
 }
 
-void Monitor::InflateThinLocked(Thread* self, mirror::Object* obj, LockWord lock_word,
+void Monitor::InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, LockWord lock_word,
                                 uint32_t hash_code) {
   DCHECK_EQ(lock_word.GetState(), LockWord::kThinLocked);
   uint32_t owner_thread_id = lock_word.ThinLockOwner();
   if (owner_thread_id == self->GetThreadId()) {
     // We own the monitor, we can easily inflate it.
-    Inflate(self, self, obj, hash_code);
+    Inflate(self, self, obj.get(), hash_code);
   } else {
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
     // Suspend the owner, inflate. First change to blocked and give up mutator_lock_.
@@ -598,7 +641,7 @@
         if (lock_word.GetState() == LockWord::kThinLocked &&
             lock_word.ThinLockOwner() == owner_thread_id) {
           // Go ahead and inflate the lock.
-          Inflate(self, owner, obj, hash_code);
+          Inflate(self, owner, obj.get(), hash_code);
         }
         thread_list->Resume(owner, false);
       }
@@ -611,12 +654,13 @@
   DCHECK(obj != NULL);
   uint32_t thread_id = self->GetThreadId();
   size_t contention_count = 0;
+  SirtRef<mirror::Object> sirt_obj(self, obj);
   while (true) {
-    LockWord lock_word = obj->GetLockWord();
+    LockWord lock_word = sirt_obj->GetLockWord();
     switch (lock_word.GetState()) {
       case LockWord::kUnlocked: {
         LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
-        if (obj->CasLockWord(lock_word, thin_locked)) {
+        if (sirt_obj->CasLockWord(lock_word, thin_locked)) {
           return;  // Success!
         }
         continue;  // Go again.
@@ -628,11 +672,11 @@
           uint32_t new_count = lock_word.ThinLockCount() + 1;
           if (LIKELY(new_count <= LockWord::kThinLockMaxCount)) {
             LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
-            obj->SetLockWord(thin_locked);
+            sirt_obj->SetLockWord(thin_locked);
             return;  // Success!
           } else {
             // We'd overflow the recursion count, so inflate the monitor.
-            InflateThinLocked(self, obj, lock_word, 0);
+            InflateThinLocked(self, sirt_obj, lock_word, 0);
           }
         } else {
           // Contention.
@@ -642,7 +686,7 @@
             NanoSleep(1000);  // Sleep for 1us and re-attempt.
           } else {
             contention_count = 0;
-            InflateThinLocked(self, obj, lock_word, 0);
+            InflateThinLocked(self, sirt_obj, lock_word, 0);
           }
         }
         continue;  // Start from the beginning.
@@ -654,9 +698,13 @@
       }
       case LockWord::kHashCode: {
         // Inflate with the existing hashcode.
-        Inflate(self, nullptr, obj, lock_word.GetHashCode());
+        Inflate(self, nullptr, sirt_obj.get(), lock_word.GetHashCode());
         break;
       }
+      default: {
+        LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+        return;
+      }
     }
   }
 }
@@ -666,11 +714,12 @@
   DCHECK(obj != NULL);
 
   LockWord lock_word = obj->GetLockWord();
+  SirtRef<mirror::Object> sirt_obj(self, obj);
   switch (lock_word.GetState()) {
     case LockWord::kHashCode:
       // Fall-through.
     case LockWord::kUnlocked:
-      FailedUnlock(obj, self, NULL, NULL);
+      FailedUnlock(sirt_obj.get(), self, NULL, NULL);
       return false;  // Failure.
     case LockWord::kThinLocked: {
       uint32_t thread_id = self->GetThreadId();
@@ -679,16 +728,16 @@
         // TODO: there's a race here with the owner dying while we unlock.
         Thread* owner =
             Runtime::Current()->GetThreadList()->FindThreadByThreadId(lock_word.ThinLockOwner());
-        FailedUnlock(obj, self, owner, NULL);
+        FailedUnlock(sirt_obj.get(), self, owner, NULL);
         return false;  // Failure.
       } else {
         // We own the lock, decrease the recursion count.
         if (lock_word.ThinLockCount() != 0) {
           uint32_t new_count = lock_word.ThinLockCount() - 1;
           LockWord thin_locked(LockWord::FromThinLockId(thread_id, new_count));
-          obj->SetLockWord(thin_locked);
+          sirt_obj->SetLockWord(thin_locked);
         } else {
-          obj->SetLockWord(LockWord());
+          sirt_obj->SetLockWord(LockWord());
         }
         return true;  // Success!
       }
@@ -697,9 +746,10 @@
       Monitor* mon = lock_word.FatLockMonitor();
       return mon->Unlock(self);
     }
-    default:
-      LOG(FATAL) << "Unreachable";
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
       return false;
+    }
   }
 }
 
@@ -733,6 +783,10 @@
     }
     case LockWord::kFatLocked:
       break;  // Already set for a wait.
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      return;
+    }
   }
   Monitor* mon = lock_word.FatLockMonitor();
   mon->Wait(self, ms, ns, interruptShouldThrow, why);
@@ -769,6 +823,10 @@
       }
       return;  // Success.
     }
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      return;
+    }
   }
 }
 
@@ -787,9 +845,10 @@
       Monitor* mon = lock_word.FatLockMonitor();
       return mon->GetOwnerThreadId();
     }
-    default:
+    default: {
       LOG(FATAL) << "Unreachable";
       return ThreadList::kInvalidThreadId;
+    }
   }
 }
 
@@ -1011,7 +1070,8 @@
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
     mirror::Object* obj = m->GetObject();
-    mirror::Object* new_obj = visitor(obj, arg);
+    // The object of a monitor can be null if we have deflated it.
+    mirror::Object* new_obj = obj != nullptr ? visitor(obj, arg) : nullptr;
     if (new_obj == nullptr) {
       VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
                     << m->GetObject();
@@ -1031,6 +1091,8 @@
   switch (lock_word.GetState()) {
     case LockWord::kUnlocked:
       // Fall-through.
+    case LockWord::kForwardingAddress:
+      // Fall-through.
     case LockWord::kHashCode:
       break;
     case LockWord::kThinLocked:
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 09cfafa..bfd8545 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -27,6 +27,7 @@
 #include "atomic_integer.h"
 #include "base/mutex.h"
 #include "root_visitor.h"
+#include "sirt_ref.h"
 #include "thread_state.h"
 
 namespace art {
@@ -107,9 +108,12 @@
     return hash_code_.load() != 0;
   }
 
-  static void InflateThinLocked(Thread* self, mirror::Object* obj, LockWord lock_word,
+  static void InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, LockWord lock_word,
                                 uint32_t hash_code) NO_THREAD_SAFETY_ANALYSIS;
 
+  static bool Deflate(Thread* self, mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   explicit Monitor(Thread* owner, mirror::Object* obj, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -170,6 +174,9 @@
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable monitor_contenders_ GUARDED_BY(monitor_lock_);
 
+  // Number of people waiting on the condition.
+  size_t num_waiters_ GUARDED_BY(monitor_lock_);
+
   // Which thread currently owns the lock?
   Thread* volatile owner_ GUARDED_BY(monitor_lock_);
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index ab5eab3..c9e0e83 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -161,7 +161,7 @@
   ScopedObjectAccess soa(env);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   class_linker->RegisterDexFile(*dex_file);
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(javaLoader);
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(javaLoader));
   mirror::Class* result = class_linker->DefineClass(descriptor.c_str(), class_loader, *dex_file,
                                                     *dex_class_def);
   VLOG(class_linker) << "DexFile_defineClassNative returning " << result;
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 96c3e78..67c4505 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -20,6 +20,7 @@
 #include "class_linker.h"
 #include "common_throws.h"
 #include "debugger.h"
+#include "gc/space/bump_pointer_space.h"
 #include "gc/space/dlmalloc_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -247,7 +248,7 @@
 // /proc/<pid>/smaps.
 static void VMDebug_getHeapSpaceStats(JNIEnv* env, jclass, jlongArray data) {
   jlong* arr = reinterpret_cast<jlong*>(env->GetPrimitiveArrayCritical(data, 0));
-  if (arr == NULL || env->GetArrayLength(data) < 9) {
+  if (arr == nullptr || env->GetArrayLength(data) < 9) {
     return;
   }
 
@@ -257,29 +258,26 @@
   size_t zygoteUsed = 0;
   size_t largeObjectsSize = 0;
   size_t largeObjectsUsed = 0;
-
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  const std::vector<gc::space::ContinuousSpace*>& continuous_spaces = heap->GetContinuousSpaces();
-  const std::vector<gc::space::DiscontinuousSpace*>& discontinuous_spaces = heap->GetDiscontinuousSpaces();
-  typedef std::vector<gc::space::ContinuousSpace*>::const_iterator It;
-  for (It it = continuous_spaces.begin(), end = continuous_spaces.end(); it != end; ++it) {
-    gc::space::ContinuousSpace* space = *it;
+  for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
     if (space->IsImageSpace()) {
       // Currently don't include the image space.
     } else if (space->IsZygoteSpace()) {
-      gc::space::DlMallocSpace* dlmalloc_space = space->AsDlMallocSpace();
-      zygoteSize += dlmalloc_space->GetFootprint();
-      zygoteUsed += dlmalloc_space->GetBytesAllocated();
-    } else {
-      // This is the alloc space.
-      gc::space::DlMallocSpace* dlmalloc_space = space->AsDlMallocSpace();
-      allocSize += dlmalloc_space->GetFootprint();
-      allocUsed += dlmalloc_space->GetBytesAllocated();
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      zygoteSize += malloc_space->GetFootprint();
+      zygoteUsed += malloc_space->GetBytesAllocated();
+    } else if (space->IsMallocSpace()) {
+      // This is a malloc space.
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      allocSize += malloc_space->GetFootprint();
+      allocUsed += malloc_space->GetBytesAllocated();
+    } else if (space->IsBumpPointerSpace()) {
+      gc::space::BumpPointerSpace* bump_pointer_space = space->AsBumpPointerSpace();
+      allocSize += bump_pointer_space->Size();
+      allocUsed += bump_pointer_space->GetBytesAllocated();
     }
   }
-  typedef std::vector<gc::space::DiscontinuousSpace*>::const_iterator It2;
-  for (It2 it = discontinuous_spaces.begin(), end = discontinuous_spaces.end(); it != end; ++it) {
-    gc::space::DiscontinuousSpace* space = *it;
+  for (gc::space::DiscontinuousSpace* space : heap->GetDiscontinuousSpaces()) {
     if (space->IsLargeObjectSpace()) {
       largeObjectsSize += space->AsLargeObjectSpace()->GetBytesAllocated();
       largeObjectsUsed += largeObjectsSize;
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index aef000c..fd3d91e 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -53,18 +53,9 @@
 static void VMRuntime_disableJitCompilation(JNIEnv*, jobject) {
 }
 
-static jobject VMRuntime_newNonMovableArray(JNIEnv* env,
-                                            jobject,
-                                            jclass javaElementClass,
+static jobject VMRuntime_newNonMovableArray(JNIEnv* env, jobject, jclass javaElementClass,
                                             jint length) {
   ScopedFastNativeObjectAccess soa(env);
-#ifdef MOVING_GARBAGE_COLLECTOR
-  // TODO: right now, we don't have a copying collector, so there's no need
-  // to do anything special here, but we ought to pass the non-movability
-  // through to the allocator.
-  UNIMPLEMENTED(FATAL);
-#endif
-
   mirror::Class* element_class = soa.Decode<mirror::Class*>(javaElementClass);
   if (element_class == NULL) {
     ThrowNullPointerException(NULL, "element class == null");
@@ -74,13 +65,14 @@
     ThrowNegativeArraySizeException(length);
     return NULL;
   }
-
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   std::string descriptor;
   descriptor += "[";
   descriptor += ClassHelper(element_class).GetDescriptor();
-  mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), NULL);
-  mirror::Array* result = mirror::Array::Alloc(soa.Self(), array_class, length);
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), nullptr);
+  mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
+  mirror::Array* result = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
+                                                     Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
   return soa.AddLocalReference<jobject>(result);
 }
 
@@ -94,7 +86,10 @@
     ThrowIllegalArgumentException(NULL, "not an array");
     return 0;
   }
-  // TODO: we should also check that this is a non-movable array.
+  if (Runtime::Current()->GetHeap()->IsMovableObject(array)) {
+    ThrowRuntimeException("Trying to get address of movable array object");
+    return 0;
+  }
   return reinterpret_cast<uintptr_t>(array->GetRawData(array->GetClass()->GetComponentSize()));
 }
 
@@ -172,28 +167,7 @@
 }
 
 static void VMRuntime_trimHeap(JNIEnv*, jobject) {
-  uint64_t start_ns = NanoTime();
-
-  // Trim the managed heap.
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  float managed_utilization = (static_cast<float>(heap->GetBytesAllocated()) /
-                               heap->GetTotalMemory());
-  size_t managed_reclaimed = heap->Trim();
-
-  uint64_t gc_heap_end_ns = NanoTime();
-
-  // Trim the native heap.
-  dlmalloc_trim(0);
-  size_t native_reclaimed = 0;
-  dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
-
-  uint64_t end_ns = NanoTime();
-
-  LOG(INFO) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
-      << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
-      << PrettyDuration(end_ns - gc_heap_end_ns) << ", advised=" << PrettySize(native_reclaimed)
-      << ") heaps. Managed heap utilization of " << static_cast<int>(100 * managed_utilization)
-      << "%.";
+  Runtime::Current()->GetHeap()->Trim();
 }
 
 static void VMRuntime_concurrentGC(JNIEnv* env, jobject) {
@@ -212,7 +186,7 @@
 }
 
 // Based on ClassLinker::ResolveString.
-static void PreloadDexCachesResolveString(mirror::DexCache* dex_cache,
+static void PreloadDexCachesResolveString(SirtRef<mirror::DexCache>& dex_cache,
                                           uint32_t string_idx,
                                           StringTable& strings)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -260,7 +234,7 @@
 }
 
 // Based on ClassLinker::ResolveField.
-static void PreloadDexCachesResolveField(mirror::DexCache* dex_cache,
+static void PreloadDexCachesResolveField(SirtRef<mirror::DexCache>& dex_cache,
                                          uint32_t field_idx,
                                          bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -275,9 +249,9 @@
     return;
   }
   if (is_static) {
-    field = klass->FindStaticField(dex_cache, field_idx);
+    field = klass->FindStaticField(dex_cache.get(), field_idx);
   } else {
-    field = klass->FindInstanceField(dex_cache, field_idx);
+    field = klass->FindInstanceField(dex_cache.get(), field_idx);
   }
   if (field == NULL) {
     return;
@@ -287,7 +261,7 @@
 }
 
 // Based on ClassLinker::ResolveMethod.
-static void PreloadDexCachesResolveMethod(mirror::DexCache* dex_cache,
+static void PreloadDexCachesResolveMethod(SirtRef<mirror::DexCache>& dex_cache,
                                           uint32_t method_idx,
                                           InvokeType invoke_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -304,14 +278,14 @@
   switch (invoke_type) {
     case kDirect:
     case kStatic:
-      method = klass->FindDirectMethod(dex_cache, method_idx);
+      method = klass->FindDirectMethod(dex_cache.get(), method_idx);
       break;
     case kInterface:
-      method = klass->FindInterfaceMethod(dex_cache, method_idx);
+      method = klass->FindInterfaceMethod(dex_cache.get(), method_idx);
       break;
     case kSuper:
     case kVirtual:
-      method = klass->FindVirtualMethod(dex_cache, method_idx);
+      method = klass->FindVirtualMethod(dex_cache.get(), method_idx);
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << invoke_type;
@@ -430,6 +404,7 @@
 
   Runtime* runtime = Runtime::Current();
   ClassLinker* linker = runtime->GetClassLinker();
+  Thread* self = ThreadForEnv(env);
 
   // We use a std::map to avoid heap allocating StringObjects to lookup in gDvm.literalStrings
   StringTable strings;
@@ -441,7 +416,7 @@
   for (size_t i = 0; i< boot_class_path.size(); i++) {
     const DexFile* dex_file = boot_class_path[i];
     CHECK(dex_file != NULL);
-    mirror::DexCache* dex_cache = linker->FindDexCache(*dex_file);
+    SirtRef<mirror::DexCache> dex_cache(self, linker->FindDexCache(*dex_file));
 
     if (kPreloadDexCachesStrings) {
       for (size_t i = 0; i < dex_cache->NumStrings(); i++) {
@@ -451,7 +426,7 @@
 
     if (kPreloadDexCachesTypes) {
       for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
-        PreloadDexCachesResolveType(dex_cache, i);
+        PreloadDexCachesResolveType(dex_cache.get(), i);
       }
     }
 
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 3591611..3389107 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -61,7 +61,8 @@
   }
 
   std::string descriptor(DotToDescriptor(name.c_str()));
-  mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(javaLoader);
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                            soa.Decode<mirror::ClassLoader*>(javaLoader));
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   mirror::Class* c = class_linker->FindClass(descriptor.c_str(), class_loader);
   if (c == NULL) {
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index a2d6b18..52cdb59 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -52,13 +52,14 @@
   descriptor += ClassHelper(element_class).GetDescriptor();
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), element_class->GetClassLoader());
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(), element_class->GetClassLoader());
+  mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
   if (UNLIKELY(array_class == NULL)) {
     CHECK(soa.Self()->IsExceptionPending());
     return NULL;
   }
   DCHECK(array_class->IsArrayClass());
-  mirror::Array* new_array = mirror::Array::Alloc(soa.Self(), array_class, length);
+  mirror::Array* new_array = mirror::Array::Alloc<true>(soa.Self(), array_class, length);
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index aa72755..04dfcb5 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -56,7 +56,7 @@
     return NULL;
   }
 
-  mirror::Object* receiver = c->AllocObject(soa.Self());
+  mirror::Object* receiver = c->AllocNonMovableObject(soa.Self());
   if (receiver == NULL) {
     return NULL;
   }
diff --git a/runtime/native/java_lang_reflect_Proxy.cc b/runtime/native/java_lang_reflect_Proxy.cc
index a92823a..809369a 100644
--- a/runtime/native/java_lang_reflect_Proxy.cc
+++ b/runtime/native/java_lang_reflect_Proxy.cc
@@ -23,20 +23,12 @@
 
 namespace art {
 
-static jclass Proxy_generateProxy(JNIEnv* env, jclass, jstring javaName,
-                                  jobjectArray javaInterfaces, jobject javaLoader,
-                                  jobjectArray javaMethods, jobjectArray javaThrows) {
+static jclass Proxy_generateProxy(JNIEnv* env, jclass, jstring name, jobjectArray interfaces,
+                                  jobject loader, jobjectArray methods, jobjectArray throws) {
   ScopedObjectAccess soa(env);
-  mirror::String* name = soa.Decode<mirror::String*>(javaName);
-  mirror::ObjectArray<mirror::Class>* interfaces =
-      soa.Decode<mirror::ObjectArray<mirror::Class>*>(javaInterfaces);
-  mirror::ClassLoader* loader = soa.Decode<mirror::ClassLoader*>(javaLoader);
-  mirror::ObjectArray<mirror::ArtMethod>* methods =
-      soa.Decode<mirror::ObjectArray<mirror::ArtMethod>*>(javaMethods);
-  mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >* throws =
-      soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(javaThrows);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::Class* result = class_linker->CreateProxyClass(name, interfaces, loader, methods, throws);
+  mirror::Class* result = class_linker->CreateProxyClass(soa, name, interfaces, loader, methods,
+                                                         throws);
   return soa.AddLocalReference<jclass>(result);
 }
 
diff --git a/runtime/native/scoped_fast_native_object_access.h b/runtime/native/scoped_fast_native_object_access.h
index d941ec3..1658d96 100644
--- a/runtime/native/scoped_fast_native_object_access.h
+++ b/runtime/native/scoped_fast_native_object_access.h
@@ -63,10 +63,6 @@
     Locks::mutator_lock_->AssertSharedHeld(Self());
     // Don't work with raw objects in non-runnable states.
     DCHECK_EQ(Self()->GetState(), kRunnable);
-#ifdef MOVING_GARBAGE_COLLECTOR
-    // TODO: we should make these unique weak globals if Field instances can ever move.
-    UNIMPLEMENTED(WARNING);
-#endif
     return reinterpret_cast<mirror::ArtField*>(fid);
   }
 
@@ -83,6 +79,10 @@
       return NULL;
     }
 
+    if (kIsDebugBuild) {
+      Runtime::Current()->GetHeap()->VerifyObject(obj);
+    }
+
     DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
 
     IndirectReferenceTable& locals = Env()->locals;
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index f724776..e37510c 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -67,12 +67,9 @@
 
 class ClassHelper {
  public:
-  ClassHelper(const mirror::Class* c = NULL, ClassLinker* l = NULL)
+  explicit ClassHelper(const mirror::Class* c )
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : class_linker_(l),
-        dex_cache_(NULL),
-        dex_file_(NULL),
-        interface_type_list_(NULL),
+      : interface_type_list_(NULL),
         klass_(NULL) {
     if (c != NULL) {
       ChangeClass(c);
@@ -82,13 +79,9 @@
   void ChangeClass(const mirror::Class* new_c)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(new_c != NULL) << "klass_=" << klass_;  // Log what we were changing from if any
-    CHECK(new_c->IsClass()) << "new_c=" << new_c;
-    if (dex_cache_ != NULL) {
-      mirror::DexCache* new_c_dex_cache = new_c->GetDexCache();
-      if (new_c_dex_cache != dex_cache_) {
-        dex_cache_ = new_c_dex_cache;
-        dex_file_ = NULL;
-      }
+    if (!new_c->IsClass()) {
+      LOG(FATAL) << "new_c=" << new_c << " cc " << new_c->GetClass() << " ccc "
+          << ((new_c->GetClass() != nullptr) ? new_c->GetClass()->GetClass() : NULL);
     }
     klass_ = new_c;
     interface_type_list_ = NULL;
@@ -201,20 +194,11 @@
   }
 
   const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (dex_file_ == NULL) {
-      dex_file_ = GetDexCache()->GetDexFile();
-    }
-    return *dex_file_;
+    return *GetDexCache()->GetDexFile();
   }
 
   mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::DexCache* result = dex_cache_;
-    if (result == NULL) {
-      DCHECK(klass_ != NULL);
-      result = klass_->GetDexCache();
-      dex_cache_ = result;
-    }
-    return result;
+    return klass_->GetDexCache();
   }
 
  private:
@@ -231,18 +215,10 @@
     return result;
   }
 
-  ClassLinker* GetClassLinker() {
-    ClassLinker* result = class_linker_;
-    if (result == NULL) {
-      result = Runtime::Current()->GetClassLinker();
-      class_linker_ = result;
-    }
-    return result;
+  ClassLinker* GetClassLinker() ALWAYS_INLINE {
+    return Runtime::Current()->GetClassLinker();
   }
 
-  ClassLinker* class_linker_;
-  mirror::DexCache* dex_cache_;
-  const DexFile* dex_file_;
   const DexFile::TypeList* interface_type_list_;
   const mirror::Class* klass_;
   std::string descriptor_;
@@ -252,20 +228,11 @@
 
 class FieldHelper {
  public:
-  FieldHelper() : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), field_(NULL) {}
-  explicit FieldHelper(const mirror::ArtField* f) : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), field_(f) {}
-  FieldHelper(const mirror::ArtField* f, ClassLinker* l)
-      : class_linker_(l), dex_cache_(NULL), dex_file_(NULL), field_(f) {}
+  FieldHelper() : field_(NULL) {}
+  explicit FieldHelper(const mirror::ArtField* f) : field_(f) {}
 
   void ChangeField(const mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(new_f != NULL);
-    if (dex_cache_ != NULL) {
-      mirror::DexCache* new_f_dex_cache = new_f->GetDeclaringClass()->GetDexCache();
-      if (new_f_dex_cache != dex_cache_) {
-        dex_cache_ = new_f_dex_cache;
-        dex_file_ = NULL;
-      }
-    }
     field_ = new_f;
   }
 
@@ -343,31 +310,14 @@
 
  private:
   mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::DexCache* result = dex_cache_;
-    if (result == NULL) {
-      result = field_->GetDeclaringClass()->GetDexCache();
-      dex_cache_ = result;
-    }
-    return result;
+    return field_->GetDeclaringClass()->GetDexCache();
   }
-  ClassLinker* GetClassLinker() {
-    ClassLinker* result = class_linker_;
-    if (result == NULL) {
-      result = Runtime::Current()->GetClassLinker();
-      class_linker_ = result;
-    }
-    return result;
+  ClassLinker* GetClassLinker() ALWAYS_INLINE {
+    return Runtime::Current()->GetClassLinker();
   }
   const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (dex_file_ == NULL) {
-      dex_file_ = GetDexCache()->GetDexFile();
-    }
-    return *dex_file_;
+    return *GetDexCache()->GetDexFile();
   }
-
-  ClassLinker* class_linker_;
-  mirror::DexCache* dex_cache_;
-  const DexFile* dex_file_;
   const mirror::ArtField* field_;
   std::string declaring_class_descriptor_;
 
@@ -377,38 +327,17 @@
 class MethodHelper {
  public:
   MethodHelper()
-     : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
+     : method_(NULL), shorty_(NULL),
        shorty_len_(0) {}
 
   explicit MethodHelper(const mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
-        shorty_len_(0) {
-    SetMethod(m);
-  }
-
-  MethodHelper(const mirror::ArtMethod* m, ClassLinker* l)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : class_linker_(l), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
-        shorty_len_(0) {
+      : method_(NULL), shorty_(NULL), shorty_len_(0) {
     SetMethod(m);
   }
 
   void ChangeMethod(mirror::ArtMethod* new_m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(new_m != NULL);
-    if (dex_cache_ != NULL) {
-      mirror::Class* klass = new_m->GetDeclaringClass();
-      if (klass->IsProxyClass()) {
-        dex_cache_ = NULL;
-        dex_file_ = NULL;
-      } else {
-        mirror::DexCache* new_m_dex_cache = klass->GetDexCache();
-        if (new_m_dex_cache != dex_cache_) {
-          dex_cache_ = new_m_dex_cache;
-          dex_file_ = NULL;
-        }
-      }
-    }
     SetMethod(new_m);
     shorty_ = NULL;
   }
@@ -444,7 +373,8 @@
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
     const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
-    return GetClassLinker()->ResolveString(dex_file, method_id.name_idx_, GetDexCache());
+    SirtRef<mirror::DexCache> dex_cache(Thread::Current(), GetDexCache());
+    return GetClassLinker()->ResolveString(dex_file, method_id.name_idx_, dex_cache);
   }
 
   const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -622,28 +552,18 @@
   }
 
   const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile* result = dex_file_;
-    if (result == NULL) {
-      const mirror::DexCache* dex_cache = GetDexCache();
-      result = dex_file_ = dex_cache->GetDexFile();
-    }
-    return *result;
+    return *GetDexCache()->GetDexFile();
   }
 
   mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::DexCache* result = dex_cache_;
-    if (result == NULL) {
-      mirror::Class* klass = method_->GetDeclaringClass();
-      result = klass->GetDexCache();
-      dex_cache_ = result;
-    }
-    return result;
+    return method_->GetDeclaringClass()->GetDexCache();
   }
 
   mirror::String* ResolveString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::String* s = method_->GetDexCacheStrings()->Get(string_idx);
     if (UNLIKELY(s == NULL)) {
-      s = GetClassLinker()->ResolveString(GetDexFile(), string_idx, GetDexCache());
+      SirtRef<mirror::DexCache> dex_cache(Thread::Current(), GetDexCache());
+      s = GetClassLinker()->ResolveString(GetDexFile(), string_idx, dex_cache);
     }
     return s;
   }
@@ -705,18 +625,10 @@
     method_ = method;
   }
 
-  ClassLinker* GetClassLinker() {
-    ClassLinker* result = class_linker_;
-    if (result == NULL) {
-      result = Runtime::Current()->GetClassLinker();
-      class_linker_ = result;
-    }
-    return result;
+  ClassLinker* GetClassLinker() ALWAYS_INLINE {
+    return Runtime::Current()->GetClassLinker();
   }
 
-  ClassLinker* class_linker_;
-  mirror::DexCache* dex_cache_;
-  const DexFile* dex_file_;
   const mirror::ArtMethod* method_;
   const char* shorty_;
   uint32_t shorty_len_;
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index e95fdb9..6f65bff 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -233,7 +233,7 @@
 
 void ReferenceTable::VisitRoots(RootVisitor* visitor, void* arg) {
   for (auto& ref : entries_) {
-    ref = visitor(const_cast<mirror::Object*>(ref), arg);
+    ref = visitor(ref, arg);
   }
 }
 
diff --git a/runtime/root_visitor.h b/runtime/root_visitor.h
index a2d898b..d52f351 100644
--- a/runtime/root_visitor.h
+++ b/runtime/root_visitor.h
@@ -23,11 +23,13 @@
 }  // namespace mirror
 class StackVisitor;
 
+// Returns the new address of the object, returns root if it has not moved.
 typedef mirror::Object* (RootVisitor)(mirror::Object* root, void* arg)
     __attribute__((warn_unused_result));
 typedef void (VerifyRootVisitor)(const mirror::Object* root, void* arg, size_t vreg,
                                  const StackVisitor* visitor);
 typedef bool (IsMarkedTester)(const mirror::Object* object, void* arg);
+typedef void (ObjectVisitorCallback)(mirror::Object* obj, void* arg);
 
 }  // namespace art
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 34cf45b..6bd2560 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -102,14 +102,20 @@
       use_compile_time_class_path_(false),
       main_thread_group_(NULL),
       system_thread_group_(NULL),
-      system_class_loader_(NULL),
-      quick_alloc_entry_points_instrumentation_counter_(0) {
+      system_class_loader_(NULL) {
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
     callee_save_methods_[i] = NULL;
   }
 }
 
 Runtime::~Runtime() {
+  if (dump_gc_performance_on_shutdown_) {
+    // This can't be called from the Heap destructor below because it
+    // could call RosAlloc::InspectAll() which needs the thread_list
+    // to be still alive.
+    heap_->DumpGcPerformanceInfo(LOG(INFO));
+  }
+
   Thread* self = Thread::Current();
   {
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
@@ -122,9 +128,14 @@
   Trace::Shutdown();
 
   // Make sure to let the GC complete if it is running.
-  heap_->WaitForConcurrentGcToComplete(self);
+  heap_->WaitForGcToComplete(self);
   heap_->DeleteThreadPool();
 
+  // For RosAlloc, revoke thread local runs. Note that in tests
+  // (common_test.h) we repeat allocating and deleting Runtime
+  // objects.
+  heap_->RevokeAllThreadLocalBuffers();
+
   // Make sure our internal threads are dead before we start tearing down things they're using.
   Dbg::StopJdwp();
   delete signal_catcher_;
@@ -352,6 +363,8 @@
   parsed->parallel_gc_threads_ = sysconf(_SC_NPROCESSORS_CONF) - 1;
   // Only the main GC thread, no workers.
   parsed->conc_gc_threads_ = 0;
+  // Default is CMS which is Sticky + Partial + Full CMS GC.
+  parsed->collector_type_ = gc::kCollectorTypeCMS;
   parsed->stack_size_ = 0;  // 0 means default.
   parsed->max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   parsed->low_memory_mode_ = false;
@@ -359,11 +372,11 @@
   parsed->is_compiler_ = false;
   parsed->is_zygote_ = false;
   parsed->interpreter_only_ = false;
-  parsed->is_concurrent_gc_enabled_ = true;
   parsed->is_explicit_gc_disabled_ = false;
 
   parsed->long_pause_log_threshold_ = gc::Heap::kDefaultLongPauseLogThreshold;
   parsed->long_gc_log_threshold_ = gc::Heap::kDefaultLongGCLogThreshold;
+  parsed->dump_gc_performance_on_shutdown_ = false;
   parsed->ignore_max_footprint_ = false;
 
   parsed->lock_profiling_threshold_ = 0;
@@ -524,6 +537,8 @@
     } else if (option == "-XX:LongGCLogThreshold") {
           parsed->long_gc_log_threshold_ =
               ParseMemoryOption(option.substr(strlen("-XX:LongGCLogThreshold")).c_str(), 1024);
+    } else if (option == "-XX:DumpGCPerformanceOnShutdown") {
+      parsed->dump_gc_performance_on_shutdown_ = true;
     } else if (option == "-XX:IgnoreMaxFootprint") {
       parsed->ignore_max_footprint_ = true;
     } else if (option == "-XX:LowMemoryMode") {
@@ -542,10 +557,12 @@
       std::vector<std::string> gc_options;
       Split(option.substr(strlen("-Xgc:")), ',', gc_options);
       for (size_t i = 0; i < gc_options.size(); ++i) {
-        if (gc_options[i] == "noconcurrent") {
-          parsed->is_concurrent_gc_enabled_ = false;
-        } else if (gc_options[i] == "concurrent") {
-          parsed->is_concurrent_gc_enabled_ = true;
+        if (gc_options[i] == "MS" || gc_options[i] == "nonconcurrent") {
+          parsed->collector_type_ = gc::kCollectorTypeMS;
+        } else if (gc_options[i] == "CMS" || gc_options[i] == "concurrent") {
+          parsed->collector_type_ = gc::kCollectorTypeCMS;
+        } else if (gc_options[i] == "SS") {
+          parsed->collector_type_ = gc::kCollectorTypeSS;
         } else {
           LOG(WARNING) << "Ignoring unknown -Xgc option: " << gc_options[i];
         }
@@ -822,6 +839,11 @@
   }
 }
 
+bool Runtime::IsShuttingDown(Thread* self) {
+  MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+  return IsShuttingDownLocked();
+}
+
 void Runtime::StartDaemonThreads() {
   VLOG(startup) << "Runtime::StartDaemonThreads entering";
 
@@ -862,7 +884,6 @@
 
   is_compiler_ = options->is_compiler_;
   is_zygote_ = options->is_zygote_;
-  is_concurrent_gc_enabled_ = options->is_concurrent_gc_enabled_;
   is_explicit_gc_disabled_ = options->is_explicit_gc_disabled_;
 
   compiler_filter_ = options->compiler_filter_;
@@ -898,7 +919,7 @@
                        options->heap_target_utilization_,
                        options->heap_maximum_size_,
                        options->image_,
-                       options->is_concurrent_gc_enabled_,
+                       options->collector_type_,
                        options->parallel_gc_threads_,
                        options->conc_gc_threads_,
                        options->low_memory_mode_,
@@ -906,6 +927,8 @@
                        options->long_gc_log_threshold_,
                        options->ignore_max_footprint_);
 
+  dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
+
   BlockSignals();
   InitPlatformSignalHandlers();
 
@@ -927,12 +950,13 @@
   GetHeap()->EnableObjectValidation();
 
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
-  if (GetHeap()->GetContinuousSpaces()[0]->IsImageSpace()) {
-    class_linker_ = ClassLinker::CreateFromImage(intern_table_);
+  class_linker_ = new ClassLinker(intern_table_);
+  if (GetHeap()->HasImageSpace()) {
+    class_linker_->InitFromImage();
   } else {
     CHECK(options->boot_class_path_ != NULL);
     CHECK_NE(options->boot_class_path_->size(), 0U);
-    class_linker_ = ClassLinker::CreateFromCompiler(*options->boot_class_path_, intern_table_);
+    class_linker_->InitFromCompiler(*options->boot_class_path_);
   }
   CHECK(class_linker_ != NULL);
   verifier::MethodVerifier::Init();
@@ -1077,9 +1101,9 @@
     GetStats()->Clear(~0);
     // TODO: wouldn't it make more sense to clear _all_ threads' stats?
     Thread::Current()->GetStats()->Clear(~0);
-    InstrumentQuickAllocEntryPoints();
+    GetInstrumentation()->InstrumentQuickAllocEntryPoints();
   } else {
-    UninstrumentQuickAllocEntryPoints();
+    GetInstrumentation()->UninstrumentQuickAllocEntryPoints();
   }
   stats_enabled_ = new_state;
 }
@@ -1175,16 +1199,20 @@
         visitor(pre_allocated_OutOfMemoryError_, arg));
     DCHECK(pre_allocated_OutOfMemoryError_ != nullptr);
   }
-  resolution_method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(resolution_method_, arg));
+  resolution_method_ = down_cast<mirror::ArtMethod*>(visitor(resolution_method_, arg));
   DCHECK(resolution_method_ != nullptr);
-  imt_conflict_method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(imt_conflict_method_, arg));
-  DCHECK(imt_conflict_method_ != nullptr);
-  default_imt_ = reinterpret_cast<mirror::ObjectArray<mirror::ArtMethod>*>(visitor(default_imt_, arg));
-  DCHECK(default_imt_ != nullptr);
+  if (HasImtConflictMethod()) {
+    imt_conflict_method_ = down_cast<mirror::ArtMethod*>(visitor(imt_conflict_method_, arg));
+  }
+  if (HasDefaultImt()) {
+    default_imt_ = down_cast<mirror::ObjectArray<mirror::ArtMethod>*>(visitor(default_imt_, arg));
+  }
+
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    callee_save_methods_[i] = reinterpret_cast<mirror::ArtMethod*>(
-        visitor(callee_save_methods_[i], arg));
-    DCHECK(callee_save_methods_[i] != nullptr);
+    if (callee_save_methods_[i] != nullptr) {
+      callee_save_methods_[i] = down_cast<mirror::ArtMethod*>(
+          visitor(callee_save_methods_[i], arg));
+    }
   }
 }
 
@@ -1202,49 +1230,45 @@
   Thread* self = Thread::Current();
   SirtRef<mirror::ObjectArray<mirror::ArtMethod> > imtable(self, cl->AllocArtMethodArray(self, 64));
   mirror::ArtMethod* imt_conflict_method = Runtime::Current()->GetImtConflictMethod();
-  for (size_t i = 0; i < 64; i++) {
+  for (size_t i = 0; i < static_cast<size_t>(imtable->GetLength()); i++) {
     imtable->Set(i, imt_conflict_method);
   }
   return imtable.get();
 }
 
 mirror::ArtMethod* Runtime::CreateImtConflictMethod() {
-  mirror::Class* method_class = mirror::ArtMethod::GetJavaLangReflectArtMethod();
   Thread* self = Thread::Current();
-  SirtRef<mirror::ArtMethod>
-      method(self, down_cast<mirror::ArtMethod*>(method_class->AllocObject(self)));
-  method->SetDeclaringClass(method_class);
+  Runtime* r = Runtime::Current();
+  ClassLinker* cl = r->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for imt conflict method saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   // When compiling, the code pointer will get set later when the image is loaded.
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
   method->SetEntryPointFromCompiledCode(r->IsCompiler() ? NULL : GetImtConflictTrampoline(cl));
   return method.get();
 }
 
 mirror::ArtMethod* Runtime::CreateResolutionMethod() {
-  mirror::Class* method_class = mirror::ArtMethod::GetJavaLangReflectArtMethod();
   Thread* self = Thread::Current();
-  SirtRef<mirror::ArtMethod>
-      method(self, down_cast<mirror::ArtMethod*>(method_class->AllocObject(self)));
-  method->SetDeclaringClass(method_class);
+  Runtime* r = Runtime::Current();
+  ClassLinker* cl = r->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for resolution method saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   // When compiling, the code pointer will get set later when the image is loaded.
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
   method->SetEntryPointFromCompiledCode(r->IsCompiler() ? NULL : GetResolutionTrampoline(cl));
   return method.get();
 }
 
 mirror::ArtMethod* Runtime::CreateCalleeSaveMethod(InstructionSet instruction_set,
-                                                        CalleeSaveType type) {
-  mirror::Class* method_class = mirror::ArtMethod::GetJavaLangReflectArtMethod();
+                                                   CalleeSaveType type) {
   Thread* self = Thread::Current();
-  SirtRef<mirror::ArtMethod>
-      method(self, down_cast<mirror::ArtMethod*>(method_class->AllocObject(self)));
-  method->SetDeclaringClass(method_class);
+  Runtime* r = Runtime::Current();
+  ClassLinker* cl = r->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for callee saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   method->SetEntryPointFromCompiledCode(NULL);
@@ -1336,46 +1360,4 @@
   compile_time_class_paths_.Put(class_loader, class_path);
 }
 
-static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
-  thread->ResetQuickAllocEntryPointsForThread();
-}
-
-void SetQuickAllocEntryPointsInstrumented(bool instrumented);
-
-void Runtime::InstrumentQuickAllocEntryPoints() {
-  ThreadList* tl = thread_list_;
-  Thread* self = Thread::Current();
-  tl->SuspendAll();
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    MutexLock mu2(self, *Locks::thread_list_lock_);
-    DCHECK_GE(quick_alloc_entry_points_instrumentation_counter_, 0);
-    int old_counter = quick_alloc_entry_points_instrumentation_counter_++;
-    if (old_counter == 0) {
-      // If it was disabled, enable it.
-      SetQuickAllocEntryPointsInstrumented(true);
-      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
-    }
-  }
-  tl->ResumeAll();
-}
-
-void Runtime::UninstrumentQuickAllocEntryPoints() {
-  ThreadList* tl = thread_list_;
-  Thread* self = Thread::Current();
-  tl->SuspendAll();
-  {
-    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    MutexLock mu2(self, *Locks::thread_list_lock_);
-    DCHECK_GT(quick_alloc_entry_points_instrumentation_counter_, 0);
-    int new_counter = --quick_alloc_entry_points_instrumentation_counter_;
-    if (new_counter == 0) {
-      // Disable it if the counter becomes zero.
-      SetQuickAllocEntryPointsInstrumented(false);
-      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
-    }
-  }
-  tl->ResumeAll();
-}
-
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 0ce2642..e6951d9 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -27,6 +27,7 @@
 
 #include "base/macros.h"
 #include "base/stringpiece.h"
+#include "gc/collector_type.h"
 #include "gc/heap.h"
 #include "globals.h"
 #include "instruction_set.h"
@@ -99,10 +100,10 @@
     bool is_compiler_;
     bool is_zygote_;
     bool interpreter_only_;
-    bool is_concurrent_gc_enabled_;
     bool is_explicit_gc_disabled_;
     size_t long_pause_log_threshold_;
     size_t long_gc_log_threshold_;
+    bool dump_gc_performance_on_shutdown_;
     bool ignore_max_footprint_;
     size_t heap_initial_size_;
     size_t heap_maximum_size_;
@@ -112,6 +113,7 @@
     double heap_target_utilization_;
     size_t parallel_gc_threads_;
     size_t conc_gc_threads_;
+    gc::CollectorType collector_type_;
     size_t stack_size_;
     size_t max_spins_before_thin_lock_inflation_;
     bool low_memory_mode_;
@@ -149,10 +151,6 @@
     return is_zygote_;
   }
 
-  bool IsConcurrentGcEnabled() const {
-    return is_concurrent_gc_enabled_;
-  }
-
   bool IsExplicitGcDisabled() const {
     return is_explicit_gc_disabled_;
   }
@@ -203,7 +201,8 @@
   // Starts a runtime, which may cause threads to be started and code to run.
   bool Start() UNLOCK_FUNCTION(Locks::mutator_lock_);
 
-  bool IsShuttingDown() const EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_) {
+  bool IsShuttingDown(Thread* self);
+  bool IsShuttingDownLocked() const EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_) {
     return shutting_down_;
   }
 
@@ -442,9 +441,6 @@
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
-  void InstrumentQuickAllocEntryPoints();
-  void UninstrumentQuickAllocEntryPoints();
-
  private:
   static void InitPlatformSignalHandlers();
 
@@ -567,7 +563,8 @@
   // As returned by ClassLoader.getSystemClassLoader().
   jobject system_class_loader_;
 
-  int quick_alloc_entry_points_instrumentation_counter_;
+  // If true, then we dump the GC cumulative timings on shutdown.
+  bool dump_gc_performance_on_shutdown_;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index c39cdb2..1ca6c4e 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -34,9 +34,8 @@
     if (UNLIKELY(self_ == NULL)) {
       // Value chosen arbitrarily and won't be used in the destructor since thread_ == NULL.
       old_thread_state_ = kTerminated;
-      MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
       Runtime* runtime = Runtime::Current();
-      CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown());
+      CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown(self_));
     } else {
       bool runnable_transition;
       DCHECK_EQ(self, Thread::Current());
@@ -63,9 +62,8 @@
   ~ScopedThreadStateChange() LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) ALWAYS_INLINE {
     if (UNLIKELY(self_ == NULL)) {
       if (!expected_has_no_thread_) {
-        MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
         Runtime* runtime = Runtime::Current();
-        bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+        bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown(nullptr);
         CHECK(shutting_down);
       }
     } else {
@@ -167,6 +165,10 @@
       return NULL;
     }
 
+    if (kIsDebugBuild) {
+      Runtime::Current()->GetHeap()->VerifyObject(obj);
+    }
+
     DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
 
     IndirectReferenceTable& locals = Env()->locals;
@@ -185,7 +187,6 @@
       }
     }
 #endif
-
     if (Vm()->work_around_app_jni_bugs) {
       // Hand out direct pointers to support broken old apps.
       return reinterpret_cast<T>(obj);
@@ -206,10 +207,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-#ifdef MOVING_GARBAGE_COLLECTOR
-    // TODO: we should make these unique weak globals if Field instances can ever move.
-    UNIMPLEMENTED(WARNING);
-#endif
+    CHECK(!kMovingFields);
     return reinterpret_cast<mirror::ArtField*>(fid);
   }
 
@@ -217,9 +215,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-#ifdef MOVING_GARBAGE_COLLECTOR
-    UNIMPLEMENTED(WARNING);
-#endif
+    CHECK(!kMovingFields);
     return reinterpret_cast<jfieldID>(field);
   }
 
@@ -227,10 +223,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-#ifdef MOVING_GARBAGE_COLLECTOR
-    // TODO: we should make these unique weak globals if Method instances can ever move.
-    UNIMPLEMENTED(WARNING);
-#endif
+    CHECK(!kMovingMethods);
     return reinterpret_cast<mirror::ArtMethod*>(mid);
   }
 
@@ -238,9 +231,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-#ifdef MOVING_GARBAGE_COLLECTOR
-    UNIMPLEMENTED(WARNING);
-#endif
+    CHECK(!kMovingMethods);
     return reinterpret_cast<jmethodID>(method);
   }
 
diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h
index a1f8a66..56d81ec 100644
--- a/runtime/sirt_ref.h
+++ b/runtime/sirt_ref.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_SIRT_REF_H_
 #define ART_RUNTIME_SIRT_REF_H_
 
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "thread.h"
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 5d3a9a5..a505383 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -22,12 +22,17 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
+#include "runtime.h"
 #include "thread_list.h"
 #include "throw_location.h"
 #include "vmap_table.h"
 
 namespace art {
 
+bool ShadowFrame::VerifyReference(const mirror::Object* val) const {
+  return !Runtime::Current()->GetHeap()->IsInTempSpace(val);
+}
+
 mirror::Object* ShadowFrame::GetThisObject() const {
   mirror::ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
diff --git a/runtime/stack.h b/runtime/stack.h
index a4b93bc..3d6b06a 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -150,10 +150,15 @@
     return *reinterpret_cast<unaligned_double*>(vreg);
   }
 
+  template <bool kChecked = false>
   mirror::Object* GetVRegReference(size_t i) const {
     DCHECK_LT(i, NumberOfVRegs());
     if (HasReferenceArray()) {
       mirror::Object* ref = References()[i];
+      if (kChecked) {
+        CHECK(VerifyReference(ref)) << "VReg " << i << "(" << ref
+                                    << ") is in protected space, reference array " << true;
+      }
       // If the vreg reference is not equal to the vreg then the vreg reference is stale.
       if (reinterpret_cast<uint32_t>(ref) != vregs_[i]) {
         return nullptr;
@@ -161,7 +166,12 @@
       return ref;
     } else {
       const uint32_t* vreg = &vregs_[i];
-      return *reinterpret_cast<mirror::Object* const*>(vreg);
+      mirror::Object* ref = *reinterpret_cast<mirror::Object* const*>(vreg);
+      if (kChecked) {
+        CHECK(VerifyReference(ref)) << "VReg " << i
+            << "(" << ref << ") is in protected space, reference array " << false;
+      }
+      return ref;
     }
   }
 
@@ -174,12 +184,22 @@
     DCHECK_LT(i, NumberOfVRegs());
     uint32_t* vreg = &vregs_[i];
     *reinterpret_cast<int32_t*>(vreg) = val;
+    // This is needed for moving collectors since these can update the vreg references if they
+    // happen to agree with references in the reference array.
+    if (kMovingCollector && HasReferenceArray()) {
+      References()[i] = nullptr;
+    }
   }
 
   void SetVRegFloat(size_t i, float val) {
     DCHECK_LT(i, NumberOfVRegs());
     uint32_t* vreg = &vregs_[i];
     *reinterpret_cast<float*>(vreg) = val;
+    // This is needed for moving collectors since these can update the vreg references if they
+    // happen to agree with references in the reference array.
+    if (kMovingCollector && HasReferenceArray()) {
+      References()[i] = nullptr;
+    }
   }
 
   void SetVRegLong(size_t i, int64_t val) {
@@ -188,6 +208,12 @@
     // Alignment attribute required for GCC 4.8
     typedef int64_t unaligned_int64 __attribute__ ((aligned (4)));
     *reinterpret_cast<unaligned_int64*>(vreg) = val;
+    // This is needed for moving collectors since these can update the vreg references if they
+    // happen to agree with references in the reference array.
+    if (kMovingCollector && HasReferenceArray()) {
+      References()[i] = nullptr;
+      References()[i + 1] = nullptr;
+    }
   }
 
   void SetVRegDouble(size_t i, double val) {
@@ -196,10 +222,18 @@
     // Alignment attribute required for GCC 4.8
     typedef double unaligned_double __attribute__ ((aligned (4)));
     *reinterpret_cast<unaligned_double*>(vreg) = val;
+    // This is needed for moving collectors since these can update the vreg references if they
+    // happen to agree with references in the reference array.
+    if (kMovingCollector && HasReferenceArray()) {
+      References()[i] = nullptr;
+      References()[i + 1] = nullptr;
+    }
   }
 
   void SetVRegReference(size_t i, mirror::Object* val) {
     DCHECK_LT(i, NumberOfVRegs());
+    DCHECK(!kMovingCollector || VerifyReference(val))
+        << "VReg " << i << "(" << val << ") is in protected space";
     uint32_t* vreg = &vregs_[i];
     *reinterpret_cast<mirror::Object**>(vreg) = val;
     if (HasReferenceArray()) {
@@ -280,6 +314,8 @@
     return reinterpret_cast<mirror::Object* const*>(vreg_end);
   }
 
+  bool VerifyReference(const mirror::Object* val) const;
+
   mirror::Object** References() {
     return const_cast<mirror::Object**>(const_cast<const ShadowFrame*>(this)->References());
   }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 9751076..1add507 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -152,7 +152,7 @@
     MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
     // Check that if we got here we cannot be shutting down (as shutdown should never have started
     // while threads are being born).
-    CHECK(!runtime->IsShuttingDown());
+    CHECK(!runtime->IsShuttingDownLocked());
     self->Init(runtime->GetThreadList(), runtime->GetJavaVM());
     Runtime::Current()->EndThreadBirth();
   }
@@ -241,7 +241,7 @@
   bool thread_start_during_shutdown = false;
   {
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-    if (runtime->IsShuttingDown()) {
+    if (runtime->IsShuttingDownLocked()) {
       thread_start_during_shutdown = true;
     } else {
       runtime->StartThreadBirth();
@@ -328,7 +328,7 @@
   }
   {
     MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
-    if (runtime->IsShuttingDown()) {
+    if (runtime->IsShuttingDownLocked()) {
       LOG(ERROR) << "Thread attaching while runtime is shutting down: " << thread_name;
       return NULL;
     } else {
@@ -917,6 +917,7 @@
       throwing_OutOfMemoryError_(false),
       debug_suspend_count_(0),
       debug_invoke_req_(new DebugInvokeReq),
+      single_step_control_(new SingleStepControl),
       deoptimization_shadow_frame_(NULL),
       instrumentation_stack_(new std::deque<instrumentation::InstrumentationStackFrame>),
       name_(new std::string(kThreadNameDuringStartup)),
@@ -930,6 +931,7 @@
   state_and_flags_.as_struct.flags = 0;
   state_and_flags_.as_struct.state = kNative;
   memset(&held_mutexes_[0], 0, sizeof(held_mutexes_));
+  memset(rosalloc_runs_, 0, sizeof(rosalloc_runs_));
 }
 
 bool Thread::IsStillStarting() const {
@@ -1018,10 +1020,13 @@
   }
 
   delete debug_invoke_req_;
+  delete single_step_control_;
   delete instrumentation_stack_;
   delete name_;
   delete stack_trace_sample_;
 
+  Runtime::Current()->GetHeap()->RevokeThreadLocalBuffers(this);
+
   TearDownAlternateSignalStack();
 }
 
@@ -1352,13 +1357,12 @@
     *stack_depth = depth;
   }
 
-  MethodHelper mh;
   for (int32_t i = 0; i < depth; ++i) {
     mirror::ObjectArray<mirror::Object>* method_trace =
           soa.Decode<mirror::ObjectArray<mirror::Object>*>(internal);
     // Prepare parameters for StackTraceElement(String cls, String method, String file, int line)
     mirror::ArtMethod* method = down_cast<mirror::ArtMethod*>(method_trace->Get(i));
-    mh.ChangeMethod(method);
+    MethodHelper mh(method);
     mirror::IntArray* pc_trace = down_cast<mirror::IntArray*>(method_trace->Get(depth));
     uint32_t dex_pc = pc_trace->Get(i);
     int32_t line_number = mh.GetLineNumFromDexPC(dex_pc);
@@ -1385,11 +1389,8 @@
     SirtRef<mirror::String> source_name_object(soa.Self(),
                                                mirror::String::AllocFromModifiedUtf8(soa.Self(),
                                                                                      source_file));
-    mirror::StackTraceElement* obj = mirror::StackTraceElement::Alloc(soa.Self(),
-                                                                      class_name_object.get(),
-                                                                      method_name_object.get(),
-                                                                      source_name_object.get(),
-                                                                      line_number);
+    mirror::StackTraceElement* obj = mirror::StackTraceElement::Alloc(
+        soa.Self(), class_name_object, method_name_object, source_name_object, line_number);
     if (obj == NULL) {
       return NULL;
     }
@@ -1437,8 +1438,10 @@
   if (throw_location.GetMethod() != NULL) {
     cl = throw_location.GetMethod()->GetDeclaringClass()->GetClassLoader();
   }
+  SirtRef<mirror::ClassLoader> class_loader(this, cl);
   SirtRef<mirror::Class>
-      exception_class(this, runtime->GetClassLinker()->FindClass(exception_class_descriptor, cl));
+      exception_class(this, runtime->GetClassLinker()->FindClass(exception_class_descriptor,
+                                                                 class_loader));
   if (UNLIKELY(exception_class.get() == NULL)) {
     CHECK(IsExceptionPending());
     LOG(ERROR) << "No exception class " << PrettyDescriptor(exception_class_descriptor);
@@ -1453,6 +1456,12 @@
   SirtRef<mirror::Throwable> exception(this,
                                 down_cast<mirror::Throwable*>(exception_class->AllocObject(this)));
 
+  // If we couldn't allocate the exception, throw the pre-allocated out of memory exception.
+  if (exception.get() == nullptr) {
+    SetException(throw_location, Runtime::Current()->GetPreAllocatedOutOfMemoryError());
+    return;
+  }
+
   // Choose an appropriate constructor and set up the arguments.
   const char* signature;
   SirtRef<mirror::String> msg_string(this, NULL);
@@ -1741,18 +1750,21 @@
     return true;  // Continue stack walk.
   }
 
-  bool HandleDeoptimization(mirror::ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool HandleDeoptimization(mirror::ArtMethod* m)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     MethodHelper mh(m);
     const DexFile::CodeItem* code_item = mh.GetCodeItem();
     CHECK(code_item != NULL);
-    uint16_t num_regs =  code_item->registers_size_;
+    uint16_t num_regs = code_item->registers_size_;
     uint32_t dex_pc = GetDexPc();
     const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
     uint32_t new_dex_pc = dex_pc + inst->SizeInCodeUnits();
     ShadowFrame* new_frame = ShadowFrame::Create(num_regs, NULL, m, new_dex_pc);
-    verifier::MethodVerifier verifier(&mh.GetDexFile(), mh.GetDexCache(), mh.GetClassLoader(),
-                                      &mh.GetClassDef(), code_item,
-                                      m->GetDexMethodIndex(), m, m->GetAccessFlags(), false, true);
+    SirtRef<mirror::DexCache> dex_cache(self_, mh.GetDexCache());
+    SirtRef<mirror::ClassLoader> class_loader(self_, mh.GetClassLoader());
+    verifier::MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader,
+                                      &mh.GetClassDef(), code_item, m->GetDexMethodIndex(), m,
+                                      m->GetAccessFlags(), false, true);
     verifier.Verify();
     std::vector<int32_t> kinds = verifier.DescribeVRegs(dex_pc);
     for (uint16_t reg = 0; reg < num_regs; reg++) {
@@ -2088,6 +2100,13 @@
   void* const arg_;
 };
 
+void Thread::SetClassLoaderOverride(mirror::ClassLoader* class_loader_override) {
+  if (kIsDebugBuild) {
+    Runtime::Current()->GetHeap()->VerifyObject(class_loader_override);
+  }
+  class_loader_override_ = class_loader_override;
+}
+
 void Thread::VisitRoots(RootVisitor* visitor, void* arg) {
   if (opeer_ != nullptr) {
     opeer_ = visitor(opeer_, arg);
@@ -2115,10 +2134,9 @@
   for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
     if (frame.this_object_ != nullptr) {
       frame.this_object_ = visitor(frame.this_object_, arg);
-      DCHECK(frame.this_object_ != nullptr);
     }
-    frame.method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
     DCHECK(frame.method_ != nullptr);
+    frame.method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
   }
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 3aa1373..db2f7b4 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -68,6 +68,7 @@
 class ScopedObjectAccess;
 class ScopedObjectAccessUnchecked;
 class ShadowFrame;
+struct SingleStepControl;
 class Thread;
 class ThreadList;
 
@@ -177,34 +178,27 @@
       ALWAYS_INLINE;
 
   // Once called thread suspension will cause an assertion failure.
-#ifndef NDEBUG
   const char* StartAssertNoThreadSuspension(const char* cause) {
-    CHECK(cause != NULL);
-    const char* previous_cause = last_no_thread_suspension_cause_;
-    no_thread_suspension_++;
-    last_no_thread_suspension_cause_ = cause;
-    return previous_cause;
+    if (kIsDebugBuild) {
+      CHECK(cause != NULL);
+      const char* previous_cause = last_no_thread_suspension_cause_;
+      no_thread_suspension_++;
+      last_no_thread_suspension_cause_ = cause;
+      return previous_cause;
+    } else {
+      return nullptr;
+    }
   }
-#else
-  const char* StartAssertNoThreadSuspension(const char* cause) {
-    CHECK(cause != NULL);
-    return NULL;
-  }
-#endif
 
   // End region where no thread suspension is expected.
-#ifndef NDEBUG
   void EndAssertNoThreadSuspension(const char* old_cause) {
-    CHECK(old_cause != NULL || no_thread_suspension_ == 1);
-    CHECK_GT(no_thread_suspension_, 0U);
-    no_thread_suspension_--;
-    last_no_thread_suspension_cause_ = old_cause;
+    if (kIsDebugBuild) {
+      CHECK(old_cause != NULL || no_thread_suspension_ == 1);
+      CHECK_GT(no_thread_suspension_, 0U);
+      no_thread_suspension_--;
+      last_no_thread_suspension_cause_ = old_cause;
+    }
   }
-#else
-  void EndAssertNoThreadSuspension(const char*) {
-  }
-#endif
-
 
   void AssertThreadSuspensionIsAllowable(bool check_locks = true) const;
 
@@ -370,9 +364,7 @@
     return class_loader_override_;
   }
 
-  void SetClassLoaderOverride(mirror::ClassLoader* class_loader_override) {
-    class_loader_override_ = class_loader_override;
-  }
+  void SetClassLoaderOverride(mirror::ClassLoader* class_loader_override);
 
   // Create the internal representation of a stack trace, that is more time
   // and space efficient to compute than the StackTraceElement[]
@@ -522,6 +514,10 @@
     return debug_invoke_req_;
   }
 
+  SingleStepControl* GetSingleStepControl() const {
+    return single_step_control_;
+  }
+
   void SetDeoptimizationShadowFrame(ShadowFrame* sf);
   void SetDeoptimizationReturnValue(const JValue& ret_val);
 
@@ -755,6 +751,9 @@
   // JDWP invoke-during-breakpoint support.
   DebugInvokeReq* debug_invoke_req_;
 
+  // JDWP single-stepping support.
+  SingleStepControl* single_step_control_;
+
   // Shadow frame that is used temporarily during the deoptimization of a method.
   ShadowFrame* deoptimization_shadow_frame_;
   JValue deoptimization_return_value_;
@@ -799,6 +798,15 @@
 
   friend class ScopedThreadStateChange;
 
+ public:
+  // Thread-local rosalloc runs. There are 34 size brackets in rosalloc
+  // runs (RosAlloc::kNumOfSizeBrackets). We can't refer to the
+  // RosAlloc class due to a header file circular dependency issue.
+  // To compensate, we check that the two values match at RosAlloc
+  // initialization time.
+  static const size_t kRosAllocNumOfSizeBrackets = 34;
+  void* rosalloc_runs_[kRosAllocNumOfSizeBrackets];
+
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index ff1ed2a..dd3f11c 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -74,6 +74,15 @@
   return Locks::thread_list_lock_->GetExclusiveOwnerTid();
 }
 
+void ThreadList::DumpNativeStacks(std::ostream& os) {
+  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+  for (const auto& thread : list_) {
+    os << "DUMPING THREAD " << thread->tid_ << "\n";
+    DumpNativeStack(os, thread->tid_, "\t", true);
+    os << "\n";
+  }
+}
+
 void ThreadList::DumpForSigQuit(std::ostream& os) {
   {
     MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
@@ -413,7 +422,7 @@
           return thread;
         }
         if (total_delay_us >= kTimeoutUs) {
-          ThreadSuspendByPeerWarning(self, ERROR, "Thread suspension timed out", peer);
+          ThreadSuspendByPeerWarning(self, FATAL, "Thread suspension timed out", peer);
           if (did_suspend_request) {
             thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
           }
@@ -477,7 +486,7 @@
           return thread;
         }
         if (total_delay_us >= kTimeoutUs) {
-          ThreadSuspendByThreadIdWarning(ERROR, "Thread suspension timed out", thread_id);
+          ThreadSuspendByThreadIdWarning(WARNING, "Thread suspension timed out", thread_id);
           if (did_suspend_request) {
             thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
           }
@@ -626,7 +635,7 @@
     {
       // No more threads can be born after we start to shutdown.
       MutexLock mu(self, *Locks::runtime_shutdown_lock_);
-      CHECK(Runtime::Current()->IsShuttingDown());
+      CHECK(Runtime::Current()->IsShuttingDownLocked());
       CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U);
     }
     all_threads_are_daemons = true;
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index b1b3e88..45994ae 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -124,6 +124,9 @@
     return list_;
   }
 
+  void DumpNativeStacks(std::ostream& os)
+      LOCKS_EXCLUDED(Locks::thread_list_lock_);
+
  private:
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_);
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index bb6c475..aca0561 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -28,12 +28,15 @@
 ThreadPoolWorker::ThreadPoolWorker(ThreadPool* thread_pool, const std::string& name,
                                    size_t stack_size)
     : thread_pool_(thread_pool),
-      name_(name),
-      stack_size_(stack_size) {
+      name_(name) {
+  std::string error_msg;
+  stack_.reset(MemMap::MapAnonymous(name.c_str(), nullptr, stack_size, PROT_READ | PROT_WRITE,
+                                    &error_msg));
+  CHECK(stack_.get() != nullptr) << error_msg;
   const char* reason = "new thread pool worker thread";
   pthread_attr_t attr;
   CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), reason);
-  CHECK_PTHREAD_CALL(pthread_attr_setstacksize, (&attr, stack_size), reason);
+  CHECK_PTHREAD_CALL(pthread_attr_setstack, (&attr, stack_->Begin(), stack_->Size()), reason);
   CHECK_PTHREAD_CALL(pthread_create, (&pthread_, &attr, &Callback, this), reason);
   CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attr), reason);
 }
@@ -71,8 +74,9 @@
   }
 }
 
-ThreadPool::ThreadPool(size_t num_threads)
-  : task_queue_lock_("task queue lock"),
+ThreadPool::ThreadPool(const char* name, size_t num_threads)
+  : name_(name),
+    task_queue_lock_("task queue lock"),
     task_queue_condition_("task queue condition", task_queue_lock_),
     completion_condition_("task completion condition", task_queue_lock_),
     started_(false),
@@ -85,7 +89,7 @@
     max_active_workers_(num_threads) {
   Thread* self = Thread::Current();
   while (GetThreadCount() < num_threads) {
-    const std::string name = StringPrintf("Thread pool worker %zu", GetThreadCount());
+    const std::string name = StringPrintf("%s worker thread %zu", name_.c_str(), GetThreadCount());
     threads_.push_back(new ThreadPoolWorker(this, name, ThreadPoolWorker::kDefaultStackSize));
   }
   // Wait for all of the threads to attach.
@@ -270,8 +274,8 @@
 
 WorkStealingWorker::~WorkStealingWorker() {}
 
-WorkStealingThreadPool::WorkStealingThreadPool(size_t num_threads)
-    : ThreadPool(0),
+WorkStealingThreadPool::WorkStealingThreadPool(const char* name, size_t num_threads)
+    : ThreadPool(name, 0),
       work_steal_lock_("work stealing lock"),
       steal_index_(0) {
   while (GetThreadCount() < num_threads) {
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index b9a97a1..e8f9afe 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -24,6 +24,7 @@
 #include "base/mutex.h"
 #include "closure.h"
 #include "locks.h"
+#include "mem_map.h"
 
 namespace art {
 
@@ -40,7 +41,8 @@
   static const size_t kDefaultStackSize = 1 * MB;
 
   size_t GetStackSize() const {
-    return stack_size_;
+    DCHECK(stack_.get() != nullptr);
+    return stack_->Size();
   }
 
   virtual ~ThreadPoolWorker();
@@ -52,7 +54,7 @@
 
   ThreadPool* const thread_pool_;
   const std::string name_;
-  const size_t stack_size_;
+  UniquePtr<MemMap> stack_;
   pthread_t pthread_;
 
  private:
@@ -77,7 +79,7 @@
   // after running it, it is the caller's responsibility.
   void AddTask(Thread* self, Task* task);
 
-  explicit ThreadPool(size_t num_threads);
+  explicit ThreadPool(const char* name, size_t num_threads);
   virtual ~ThreadPool();
 
   // Wait for all tasks currently on queue to get completed.
@@ -107,6 +109,7 @@
     return shutting_down_;
   }
 
+  const std::string name_;
   Mutex task_queue_lock_;
   ConditionVariable task_queue_condition_ GUARDED_BY(task_queue_lock_);
   ConditionVariable completion_condition_ GUARDED_BY(task_queue_lock_);
@@ -167,7 +170,7 @@
 
 class WorkStealingThreadPool : public ThreadPool {
  public:
-  explicit WorkStealingThreadPool(size_t num_threads);
+  explicit WorkStealingThreadPool(const char* name, size_t num_threads);
   virtual ~WorkStealingThreadPool();
 
  private:
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index 9b789d2..1b22361 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -59,7 +59,7 @@
 // Check that the thread pool actually runs tasks that you assign it.
 TEST_F(ThreadPoolTest, CheckRun) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Thread pool test thread pool", num_threads);
   AtomicInteger count(0);
   static const int32_t num_tasks = num_threads * 4;
   for (int32_t i = 0; i < num_tasks; ++i) {
@@ -74,7 +74,7 @@
 
 TEST_F(ThreadPoolTest, StopStart) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Thread pool test thread pool", num_threads);
   AtomicInteger count(0);
   static const int32_t num_tasks = num_threads * 4;
   for (int32_t i = 0; i < num_tasks; ++i) {
@@ -129,7 +129,7 @@
 // Test that adding new tasks from within a task works.
 TEST_F(ThreadPoolTest, RecursiveTest) {
   Thread* self = Thread::Current();
-  ThreadPool thread_pool(num_threads);
+  ThreadPool thread_pool("Thread pool test thread pool", num_threads);
   AtomicInteger count(0);
   static const int depth = 8;
   thread_pool.AddTask(self, new TreeTask(&thread_pool, &count, depth));
diff --git a/runtime/trace.cc b/runtime/trace.cc
index ec95a87..da2c80a 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -570,7 +570,8 @@
                       thread_clock_diff, wall_clock_diff);
 }
 
-void Trace::MethodUnwind(Thread* thread, const mirror::ArtMethod* method, uint32_t dex_pc) {
+void Trace::MethodUnwind(Thread* thread, mirror::Object* this_object,
+                         const mirror::ArtMethod* method, uint32_t dex_pc) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
diff --git a/runtime/trace.h b/runtime/trace.h
index ffcb36d..9be015a 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -79,7 +79,8 @@
                             const mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void MethodUnwind(Thread* thread, const mirror::ArtMethod* method, uint32_t dex_pc)
+  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
+                            const mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
                           const mirror::ArtMethod* method, uint32_t new_dex_pc)
diff --git a/runtime/utils.h b/runtime/utils.h
index 6850e8b..4b39acd 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -122,7 +122,7 @@
 // For rounding integers.
 template<typename T>
 static inline T RoundDown(T x, int n) {
-  CHECK(IsPowerOfTwo(n));
+  DCHECK(IsPowerOfTwo(n));
   return (x & -n);
 }
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 9f98061..1e45c60 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -39,6 +39,7 @@
 #include "object_utils.h"
 #include "register_line-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change.h"
 #include "verifier/dex_gc_map.h"
 
 namespace art {
@@ -113,17 +114,15 @@
     *error += dex_file.GetLocation();
     return kHardFailure;
   }
-  return VerifyClass(&dex_file,
-                     kh.GetDexCache(),
-                     klass->GetClassLoader(),
-                     class_def,
-                     allow_soft_failures,
-                     error);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, kh.GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, klass->GetClassLoader());
+  return VerifyClass(&dex_file, dex_cache, class_loader, class_def, allow_soft_failures, error);
 }
 
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(const DexFile* dex_file,
-                                                        mirror::DexCache* dex_cache,
-                                                        mirror::ClassLoader* class_loader,
+                                                        SirtRef<mirror::DexCache>& dex_cache,
+                                                        SirtRef<mirror::ClassLoader>& class_loader,
                                                         const DexFile::ClassDef* class_def,
                                                         bool allow_soft_failures,
                                                         std::string* error) {
@@ -233,8 +232,8 @@
 
 MethodVerifier::FailureKind MethodVerifier::VerifyMethod(uint32_t method_idx,
                                                          const DexFile* dex_file,
-                                                         mirror::DexCache* dex_cache,
-                                                         mirror::ClassLoader* class_loader,
+                                                         SirtRef<mirror::DexCache>& dex_cache,
+                                                         SirtRef<mirror::ClassLoader>& class_loader,
                                                          const DexFile::ClassDef* class_def,
                                                          const DexFile::CodeItem* code_item,
                                                          mirror::ArtMethod* method,
@@ -243,8 +242,8 @@
   MethodVerifier::FailureKind result = kNoFailure;
   uint64_t start_ns = NanoTime();
 
-  MethodVerifier verifier_(dex_file, dex_cache, class_loader, class_def, code_item, method_idx,
-                           method, method_access_flags, true, allow_soft_failures);
+  MethodVerifier verifier_(dex_file, &dex_cache, &class_loader, class_def, code_item,
+                           method_idx, method, method_access_flags, true, allow_soft_failures);
   if (verifier_.Verify()) {
     // Verification completed, however failures may be pending that didn't cause the verification
     // to hard fail.
@@ -277,13 +276,14 @@
 }
 
 void MethodVerifier::VerifyMethodAndDump(std::ostream& os, uint32_t dex_method_idx,
-                                         const DexFile* dex_file, mirror::DexCache* dex_cache,
-                                         mirror::ClassLoader* class_loader,
+                                         const DexFile* dex_file,
+                                         SirtRef<mirror::DexCache>& dex_cache,
+                                         SirtRef<mirror::ClassLoader>& class_loader,
                                          const DexFile::ClassDef* class_def,
                                          const DexFile::CodeItem* code_item,
                                          mirror::ArtMethod* method,
                                          uint32_t method_access_flags) {
-  MethodVerifier verifier(dex_file, dex_cache, class_loader, class_def, code_item,
+  MethodVerifier verifier(dex_file, &dex_cache, &class_loader, class_def, code_item,
                           dex_method_idx, method, method_access_flags, true, true);
   verifier.Verify();
   verifier.DumpFailures(os);
@@ -291,13 +291,12 @@
   verifier.Dump(os);
 }
 
-MethodVerifier::MethodVerifier(const DexFile* dex_file, mirror::DexCache* dex_cache,
-                               mirror::ClassLoader* class_loader,
+MethodVerifier::MethodVerifier(const DexFile* dex_file, SirtRef<mirror::DexCache>* dex_cache,
+                               SirtRef<mirror::ClassLoader>* class_loader,
                                const DexFile::ClassDef* class_def,
-                               const DexFile::CodeItem* code_item,
-                               uint32_t dex_method_idx, mirror::ArtMethod* method,
-                               uint32_t method_access_flags, bool can_load_classes,
-                               bool allow_soft_failures)
+                               const DexFile::CodeItem* code_item, uint32_t dex_method_idx,
+                               mirror::ArtMethod* method, uint32_t method_access_flags,
+                               bool can_load_classes, bool allow_soft_failures)
     : reg_types_(can_load_classes),
       work_insn_idx_(-1),
       dex_method_idx_(dex_method_idx),
@@ -323,12 +322,19 @@
   DCHECK(class_def != nullptr);
 }
 
+MethodVerifier::~MethodVerifier() {
+  STLDeleteElements(&failure_messages_);
+}
+
 void MethodVerifier::FindLocksAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc,
                                       std::vector<uint32_t>& monitor_enter_dex_pcs) {
   MethodHelper mh(m);
-  MethodVerifier verifier(&mh.GetDexFile(), mh.GetDexCache(), mh.GetClassLoader(),
-                          &mh.GetClassDef(), mh.GetCodeItem(), m->GetDexMethodIndex(),
-                          m, m->GetAccessFlags(), false, true);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
+                          mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), false,
+                          true);
   verifier.interesting_dex_pc_ = dex_pc;
   verifier.monitor_enter_dex_pcs_ = &monitor_enter_dex_pcs;
   verifier.FindLocksAtDexPc();
@@ -348,9 +354,12 @@
 mirror::ArtField* MethodVerifier::FindAccessedFieldAtDexPc(mirror::ArtMethod* m,
                                                         uint32_t dex_pc) {
   MethodHelper mh(m);
-  MethodVerifier verifier(&mh.GetDexFile(), mh.GetDexCache(), mh.GetClassLoader(),
-                          &mh.GetClassDef(), mh.GetCodeItem(), m->GetDexMethodIndex(),
-                          m, m->GetAccessFlags(), false, true);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
+                          mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), false,
+                          true);
   return verifier.FindAccessedFieldAtDexPc(dex_pc);
 }
 
@@ -374,11 +383,14 @@
 }
 
 mirror::ArtMethod* MethodVerifier::FindInvokedMethodAtDexPc(mirror::ArtMethod* m,
-                                                                 uint32_t dex_pc) {
+                                                            uint32_t dex_pc) {
   MethodHelper mh(m);
-  MethodVerifier verifier(&mh.GetDexFile(), mh.GetDexCache(), mh.GetClassLoader(),
-                          &mh.GetClassDef(), mh.GetCodeItem(), m->GetDexMethodIndex(),
-                          m, m->GetAccessFlags(), false, true);
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, mh.GetDexCache());
+  SirtRef<mirror::ClassLoader> class_loader(self, mh.GetClassLoader());
+  MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader, &mh.GetClassDef(),
+                          mh.GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), false,
+                          true);
   return verifier.FindInvokedMethodAtDexPc(dex_pc);
 }
 
@@ -589,7 +601,7 @@
       if (iterator.GetHandlerTypeIndex() != DexFile::kDexNoIndex16) {
         mirror::Class* exception_type = linker->ResolveType(*dex_file_,
                                                             iterator.GetHandlerTypeIndex(),
-                                                            dex_cache_, class_loader_);
+                                                            *dex_cache_, *class_loader_);
         if (exception_type == NULL) {
           DCHECK(Thread::Current()->IsExceptionPending());
           Thread::Current()->ClearException();
@@ -1017,26 +1029,6 @@
   return true;
 }
 
-static const std::vector<uint8_t>* CreateLengthPrefixedDexGcMap(
-    const std::vector<uint8_t>& gc_map) {
-  std::vector<uint8_t>* length_prefixed_gc_map = new std::vector<uint8_t>;
-  length_prefixed_gc_map->reserve(gc_map.size() + 4);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0xff000000) >> 24);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0x00ff0000) >> 16);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0x0000ff00) >> 8);
-  length_prefixed_gc_map->push_back((gc_map.size() & 0x000000ff) >> 0);
-  length_prefixed_gc_map->insert(length_prefixed_gc_map->end(),
-                                 gc_map.begin(),
-                                 gc_map.end());
-  DCHECK_EQ(gc_map.size() + 4, length_prefixed_gc_map->size());
-  DCHECK_EQ(gc_map.size(),
-            static_cast<size_t>((length_prefixed_gc_map->at(0) << 24) |
-                                (length_prefixed_gc_map->at(1) << 16) |
-                                (length_prefixed_gc_map->at(2) << 8) |
-                                (length_prefixed_gc_map->at(3) << 0)));
-  return length_prefixed_gc_map;
-}
-
 bool MethodVerifier::VerifyCodeFlow() {
   uint16_t registers_size = code_item_->registers_size_;
   uint32_t insns_size = code_item_->insns_size_in_code_units_;
@@ -1076,16 +1068,15 @@
     bool compile = IsCandidateForCompilation(ref, method_access_flags_);
     if (compile) {
       /* Generate a register map and add it to the method. */
-      UniquePtr<const std::vector<uint8_t> > map(GenerateGcMap());
-      if (map.get() == NULL) {
+      const std::vector<uint8_t>* dex_gc_map = GenerateLengthPrefixedGcMap();
+      if (dex_gc_map == NULL) {
         DCHECK_NE(failures_.size(), 0U);
         return false;  // Not a real failure, but a failure to encode
       }
       if (kIsDebugBuild) {
-        VerifyGcMap(*map);
+        VerifyLengthPrefixedGcMap(*dex_gc_map);
       }
-      const std::vector<uint8_t>* dex_gc_map = CreateLengthPrefixedDexGcMap(*(map.get()));
-      verifier::MethodVerifier::SetDexGcMap(ref, *dex_gc_map);
+      verifier::MethodVerifier::SetDexGcMap(ref, dex_gc_map);
     }
 
     if (has_check_casts_) {
@@ -1107,10 +1098,8 @@
 
 std::ostream& MethodVerifier::DumpFailures(std::ostream& os) {
   DCHECK_EQ(failures_.size(), failure_messages_.size());
-  if (VLOG_IS_ON(verifier)) {
-      for (size_t i = 0; i < failures_.size(); ++i) {
-          os << failure_messages_[i]->str() << "\n";
-      }
+  for (size_t i = 0; i < failures_.size(); ++i) {
+      os << failure_messages_[i]->str() << "\n";
   }
   return os;
 }
@@ -1211,7 +1200,8 @@
         // it's effectively considered initialized the instant we reach here (in the sense that we
         // can return without doing anything or call virtual methods).
         {
-          const RegType& reg_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+          const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+                                                              false);
           reg_line->SetRegisterType(arg_start + cur_arg, reg_type);
         }
         break;
@@ -1853,7 +1843,8 @@
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data with array type "
                                             << array_type;
         } else {
-          const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_);
+          const RegType& component_type = reg_types_.GetComponentType(array_type,
+                                                                      class_loader_->get());
           DCHECK(!component_type.IsConflict());
           if (component_type.IsNonZeroReferenceTypes()) {
             Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data with component type "
@@ -2168,7 +2159,7 @@
         const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
         uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
         const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
-        return_type = &reg_types_.FromDescriptor(class_loader_, descriptor, false);
+        return_type = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
       }
       if (!return_type->IsLowHalf()) {
         work_line_->SetResultRegisterType(*return_type);
@@ -2235,8 +2226,8 @@
          */
         work_line_->MarkRefsAsInitialized(this_type);
       }
-      const RegType& return_type = reg_types_.FromDescriptor(class_loader_, return_type_descriptor,
-                                                             false);
+      const RegType& return_type = reg_types_.FromDescriptor(class_loader_->get(),
+                                                             return_type_descriptor, false);
       if (!return_type.IsLowHalf()) {
         work_line_->SetResultRegisterType(return_type);
       } else {
@@ -2257,11 +2248,12 @@
           uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
           const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
           uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
-          descriptor =  dex_file_->StringByTypeIdx(return_type_idx);
+          descriptor = dex_file_->StringByTypeIdx(return_type_idx);
         } else {
           descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
         }
-        const RegType& return_type =  reg_types_.FromDescriptor(class_loader_, descriptor, false);
+        const RegType& return_type =  reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+                                                                false);
         if (!return_type.IsLowHalf()) {
           work_line_->SetResultRegisterType(return_type);
         } else {
@@ -2318,7 +2310,8 @@
       } else {
         descriptor = MethodHelper(abs_method).GetReturnTypeDescriptor();
       }
-      const RegType& return_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+      const RegType& return_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+                                                             false);
       if (!return_type.IsLowHalf()) {
         work_line_->SetResultRegisterType(return_type);
       } else {
@@ -2584,7 +2577,8 @@
       mirror::ArtMethod* called_method = VerifyInvokeVirtualQuickArgs(inst, is_range);
       if (called_method != NULL) {
         const char* descriptor = MethodHelper(called_method).GetReturnTypeDescriptor();
-        const RegType& return_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+        const RegType& return_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor,
+                                                               false);
         if (!return_type.IsLowHalf()) {
           work_line_->SetResultRegisterType(return_type);
         } else {
@@ -2850,18 +2844,18 @@
 const RegType& MethodVerifier::ResolveClassAndCheckAccess(uint32_t class_idx) {
   const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
   const RegType& referrer = GetDeclaringClass();
-  mirror::Class* klass = dex_cache_->GetResolvedType(class_idx);
+  mirror::Class* klass = (*dex_cache_)->GetResolvedType(class_idx);
   const RegType& result =
       klass != NULL ? reg_types_.FromClass(descriptor, klass,
                                            klass->CannotBeAssignedFromOtherTypes())
-                    : reg_types_.FromDescriptor(class_loader_, descriptor, false);
+                    : reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
   if (result.IsConflict()) {
     Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "accessing broken descriptor '" << descriptor
         << "' in " << referrer;
     return result;
   }
   if (klass == NULL && !result.IsUnresolvedTypes()) {
-    dex_cache_->SetResolvedType(class_idx, result.GetClass());
+    (*dex_cache_)->SetResolvedType(class_idx, result.GetClass());
   }
   // Check if access is allowed. Unresolved types use xxxWithAccessCheck to
   // check at runtime if access is allowed and so pass here. If result is
@@ -2935,7 +2929,7 @@
   }
   mirror::Class* klass = klass_type.GetClass();
   const RegType& referrer = GetDeclaringClass();
-  mirror::ArtMethod* res_method = dex_cache_->GetResolvedMethod(dex_method_idx);
+  mirror::ArtMethod* res_method = (*dex_cache_)->GetResolvedMethod(dex_method_idx);
   if (res_method == NULL) {
     const char* name = dex_file_->GetMethodName(method_id);
     const Signature signature = dex_file_->GetMethodSignature(method_id);
@@ -2948,7 +2942,7 @@
       res_method = klass->FindVirtualMethod(name, signature);
     }
     if (res_method != NULL) {
-      dex_cache_->SetResolvedMethod(dex_method_idx, res_method);
+      (*dex_cache_)->SetResolvedMethod(dex_method_idx, res_method);
     } else {
       // If a virtual or interface method wasn't found with the expected type, look in
       // the direct methods. This can happen when the wrong invoke type is used or when
@@ -3112,7 +3106,7 @@
           << " missing signature component";
       return NULL;
     }
-    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
     uint32_t get_reg = is_range ? inst->VRegC_3rc() + actual_args : arg[actual_args];
     if (reg_type.IsIntegralTypes()) {
       const RegType& src_type = work_line_->GetRegisterType(get_reg);
@@ -3136,8 +3130,7 @@
 }
 
 mirror::ArtMethod* MethodVerifier::GetQuickInvokedMethod(const Instruction* inst,
-                                                              RegisterLine* reg_line,
-                                                              bool is_range) {
+                                                         RegisterLine* reg_line, bool is_range) {
   DCHECK(inst->Opcode() == Instruction::INVOKE_VIRTUAL_QUICK ||
          inst->Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK);
   const RegType& actual_arg_type = reg_line->GetInvocationThis(inst, is_range);
@@ -3152,11 +3145,13 @@
   } else {
     const std::string& descriptor(actual_arg_type.GetDescriptor());
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    this_class = class_linker->FindClass(descriptor.c_str(), class_loader_);
+    this_class = class_linker->FindClass(descriptor.c_str(), *class_loader_);
     if (this_class == NULL) {
-      Thread::Current()->ClearException();
+      Thread* self = Thread::Current();
+      self->ClearException();
       // Look for a system class
-      this_class = class_linker->FindClass(descriptor.c_str(), NULL);
+      SirtRef<mirror::ClassLoader> null_class_loader(self, nullptr);
+      this_class = class_linker->FindClass(descriptor.c_str(), null_class_loader);
     }
   }
   if (this_class == NULL) {
@@ -3246,7 +3241,7 @@
                                         << " missing signature component";
       return NULL;
     }
-    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_, descriptor, false);
+    const RegType& reg_type = reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
     uint32_t get_reg = is_range ? inst->VRegC_3rc() + actual_args : arg[actual_args];
     if (!work_line_->VerifyRegisterType(get_reg, reg_type)) {
       return res_method;
@@ -3290,7 +3285,7 @@
     } else {
       // Verify each register. If "arg_count" is bad, VerifyRegisterType() will run off the end of
       // the list and fail. It's legal, if silly, for arg_count to be zero.
-      const RegType& expected_type = reg_types_.GetComponentType(res_type, class_loader_);
+      const RegType& expected_type = reg_types_.GetComponentType(res_type, class_loader_->get());
       uint32_t arg_count = (is_range) ? inst->VRegA_3rc() : inst->VRegA_35c();
       uint32_t arg[5];
       if (!is_range) {
@@ -3332,7 +3327,7 @@
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aget";
     } else {
       /* verify the class */
-      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_);
+      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_->get());
       if (!component_type.IsReferenceTypes() && !is_primitive) {
         Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "primitive array type " << array_type
             << " source for aget-object";
@@ -3409,7 +3404,7 @@
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aput";
     } else {
-      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_);
+      const RegType& component_type = reg_types_.GetComponentType(array_type, class_loader_->get());
       const uint32_t vregA = inst->VRegA_23x();
       if (is_primitive) {
         VerifyPrimitivePut(component_type, insn_type, vregA);
@@ -3441,10 +3436,9 @@
   if (klass_type.IsUnresolvedTypes()) {
     return NULL;  // Can't resolve Class so no more to do here, will do checking at runtime.
   }
-  mirror::ArtField* field = Runtime::Current()->GetClassLinker()->ResolveFieldJLS(*dex_file_,
-                                                                               field_idx,
-                                                                               dex_cache_,
-                                                                               class_loader_);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  mirror::ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, *dex_cache_,
+                                                          *class_loader_);
   if (field == NULL) {
     VLOG(verifier) << "Unable to resolve static field " << field_idx << " ("
               << dex_file_->GetFieldName(field_id) << ") in "
@@ -3460,9 +3454,8 @@
   } else if (!field->IsStatic()) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << PrettyField(field) << " to be static";
     return NULL;
-  } else {
-    return field;
   }
+  return field;
 }
 
 mirror::ArtField* MethodVerifier::GetInstanceField(const RegType& obj_type, int field_idx) {
@@ -3478,10 +3471,9 @@
   if (klass_type.IsUnresolvedTypes()) {
     return NULL;  // Can't resolve Class so no more to do here
   }
-  mirror::ArtField* field = Runtime::Current()->GetClassLinker()->ResolveFieldJLS(*dex_file_,
-                                                                               field_idx,
-                                                                               dex_cache_,
-                                                                               class_loader_);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  mirror::ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, *dex_cache_,
+                                                          *class_loader_);
   if (field == NULL) {
     VLOG(verifier) << "Unable to resolve instance field " << field_idx << " ("
               << dex_file_->GetFieldName(field_id) << ") in "
@@ -3550,8 +3542,7 @@
   if (field_type == nullptr) {
     const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
     const char* descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
-    mirror::ClassLoader* loader = class_loader_;
-    field_type = &reg_types_.FromDescriptor(loader, descriptor, false);
+    field_type = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
   }
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
   if (is_primitive) {
@@ -3613,8 +3604,7 @@
   if (field_type == nullptr) {
     const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
     const char* descriptor = dex_file_->GetFieldTypeDescriptor(field_id);
-    mirror::ClassLoader* loader = class_loader_;
-    field_type = &reg_types_.FromDescriptor(loader, descriptor, false);
+    field_type = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
   }
   const uint32_t vregA = (is_static) ? inst->VRegA_21c() : inst->VRegA_22c();
   if (is_primitive) {
@@ -3671,11 +3661,13 @@
     // We need to resolve the class from its descriptor.
     const std::string& descriptor(object_type.GetDescriptor());
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    object_class = class_linker->FindClass(descriptor.c_str(), class_loader_);
+    Thread* self = Thread::Current();
+    object_class = class_linker->FindClass(descriptor.c_str(), *class_loader_);
     if (object_class == NULL) {
-      Thread::Current()->ClearException();
+      self->ClearException();
       // Look for a system class
-      object_class = class_linker->FindClass(descriptor.c_str(), NULL);
+      SirtRef<mirror::ClassLoader> null_class_loader(self, nullptr);
+      object_class = class_linker->FindClass(descriptor.c_str(), null_class_loader);
     }
   }
   if (object_class == NULL) {
@@ -3881,8 +3873,8 @@
       MethodHelper mh(mirror_method_);
       mirror::Class* return_type_class = mh.GetReturnType();
       if (return_type_class != nullptr) {
-        return_type_ =&reg_types_.FromClass(mh.GetReturnTypeDescriptor(), return_type_class,
-                                            return_type_class->CannotBeAssignedFromOtherTypes());
+        return_type_ = &reg_types_.FromClass(mh.GetReturnTypeDescriptor(), return_type_class,
+                                             return_type_class->CannotBeAssignedFromOtherTypes());
       } else {
         Thread* self = Thread::Current();
         DCHECK(self->IsExceptionPending());
@@ -3894,7 +3886,7 @@
       const DexFile::ProtoId& proto_id = dex_file_->GetMethodPrototype(method_id);
       uint16_t return_type_idx = proto_id.return_type_idx_;
       const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(return_type_idx));
-      return_type_ = &reg_types_.FromDescriptor(class_loader_, descriptor, false);
+      return_type_ = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
     }
   }
   return *return_type_;
@@ -3910,7 +3902,7 @@
       declaring_class_ = &reg_types_.FromClass(descriptor, klass,
                                                klass->CannotBeAssignedFromOtherTypes());
     } else {
-      declaring_class_ = &reg_types_.FromDescriptor(class_loader_, descriptor, false);
+      declaring_class_ = &reg_types_.FromDescriptor(class_loader_->get(), descriptor, false);
     }
   }
   return *declaring_class_;
@@ -3969,7 +3961,8 @@
         // String[] in which case the stores need to be of Strings.
         if (array_type.IsPreciseReference()) {
           const RegType& value_type(line->GetRegisterType(inst->VRegA_23x()));
-          const RegType& component_type(reg_types_.GetComponentType(array_type, class_loader_));
+          const RegType& component_type(reg_types_.GetComponentType(array_type,
+                                                                    class_loader_->get()));
           is_safe_cast = component_type.IsStrictlyAssignableFrom(value_type);
         }
       }
@@ -4026,8 +4019,8 @@
       // We can't devirtualize abstract classes except on arrays of abstract classes.
       continue;
     }
-    mirror::ArtMethod* abstract_method =
-        dex_cache_->GetResolvedMethod(is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
+    mirror::ArtMethod* abstract_method = (*dex_cache_)->GetResolvedMethod(
+        is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
     if (abstract_method == NULL) {
       // If the method is not found in the cache this means that it was never found
       // by ResolveMethodAndCheckAccess() called when verifying invoke_*.
@@ -4061,7 +4054,7 @@
   return pc_to_concrete_method_map.release();
 }
 
-const std::vector<uint8_t>* MethodVerifier::GenerateGcMap() {
+const std::vector<uint8_t>* MethodVerifier::GenerateLengthPrefixedGcMap() {
   size_t num_entries, ref_bitmap_bits, pc_bits;
   ComputeGcMapSizes(&num_entries, &ref_bitmap_bits, &pc_bits);
   // There's a single byte to encode the size of each bitmap
@@ -4099,7 +4092,12 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Failed to encode GC map (size=" << table_size << ")";
     return NULL;
   }
-  table->reserve(table_size);
+  table->reserve(table_size + 4);  // table_size plus the length prefix
+  // Write table size
+  table->push_back((table_size & 0xff000000) >> 24);
+  table->push_back((table_size & 0x00ff0000) >> 16);
+  table->push_back((table_size & 0x0000ff00) >> 8);
+  table->push_back((table_size & 0x000000ff) >> 0);
   // Write table header
   table->push_back(format | ((ref_bitmap_bytes >> DexPcToReferenceMap::kRegMapFormatShift) &
                              ~DexPcToReferenceMap::kRegMapFormatMask));
@@ -4117,14 +4115,18 @@
       line->WriteReferenceBitMap(*table, ref_bitmap_bytes);
     }
   }
-  DCHECK_EQ(table->size(), table_size);
+  DCHECK_EQ(table->size(), table_size + 4);  // table_size plus the length prefix
   return table;
 }
 
-void MethodVerifier::VerifyGcMap(const std::vector<uint8_t>& data) {
+void MethodVerifier::VerifyLengthPrefixedGcMap(const std::vector<uint8_t>& data) {
   // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
   // that the table data is well formed and all references are marked (or not) in the bitmap
-  DexPcToReferenceMap map(&data[0], data.size());
+  DCHECK_GE(data.size(), 4u);
+  size_t table_size = data.size() - 4u;
+  DCHECK_EQ(table_size, static_cast<size_t>((data[0] << 24) | (data[1] << 16) |
+                                            (data[2] << 8) | (data[3] << 0)));
+  DexPcToReferenceMap map(&data[4], table_size);
   size_t map_index = 0;
   for (size_t i = 0; i < code_item_->insns_size_in_code_units_; i++) {
     const uint8_t* reg_bitmap = map.FindBitMap(i, false);
@@ -4150,7 +4152,7 @@
   }
 }
 
-void MethodVerifier::SetDexGcMap(MethodReference ref, const std::vector<uint8_t>& gc_map) {
+void MethodVerifier::SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* gc_map) {
   DCHECK(Runtime::Current()->IsCompiler());
   {
     WriterMutexLock mu(Thread::Current(), *dex_gc_maps_lock_);
@@ -4159,7 +4161,7 @@
       delete it->second;
       dex_gc_maps_->erase(it);
     }
-    dex_gc_maps_->Put(ref, &gc_map);
+    dex_gc_maps_->Put(ref, gc_map);
   }
   DCHECK(GetDexGcMap(ref) != NULL);
 }
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 57fde1d..f72898e 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -33,6 +33,7 @@
 #include "reg_type_cache-inl.h"
 #include "register_line.h"
 #include "safe_map.h"
+#include "sirt_ref.h"
 #include "UniquePtr.h"
 
 namespace art {
@@ -142,14 +143,15 @@
   static FailureKind VerifyClass(const mirror::Class* klass, bool allow_soft_failures,
                                  std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static FailureKind VerifyClass(const DexFile* dex_file, mirror::DexCache* dex_cache,
-                                 mirror::ClassLoader* class_loader,
+  static FailureKind VerifyClass(const DexFile* dex_file, SirtRef<mirror::DexCache>& dex_cache,
+                                 SirtRef<mirror::ClassLoader>& class_loader,
                                  const DexFile::ClassDef* class_def,
                                  bool allow_soft_failures, std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void VerifyMethodAndDump(std::ostream& os, uint32_t method_idx, const DexFile* dex_file,
-                                  mirror::DexCache* dex_cache, mirror::ClassLoader* class_loader,
+                                  SirtRef<mirror::DexCache>& dex_cache,
+                                  SirtRef<mirror::ClassLoader>& class_loader,
                                   const DexFile::ClassDef* class_def,
                                   const DexFile::CodeItem* code_item,
                                   mirror::ArtMethod* method, uint32_t method_access_flags)
@@ -217,16 +219,13 @@
     return can_load_classes_;
   }
 
-  MethodVerifier(const DexFile* dex_file, mirror::DexCache* dex_cache,
-                 mirror::ClassLoader* class_loader, const DexFile::ClassDef* class_def,
-                 const DexFile::CodeItem* code_item,
-                 uint32_t method_idx, mirror::ArtMethod* method,
+  MethodVerifier(const DexFile* dex_file, SirtRef<mirror::DexCache>* dex_cache,
+                 SirtRef<mirror::ClassLoader>* class_loader, const DexFile::ClassDef* class_def,
+                 const DexFile::CodeItem* code_item, uint32_t method_idx, mirror::ArtMethod* method,
                  uint32_t access_flags, bool can_load_classes, bool allow_soft_failures)
           SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ~MethodVerifier() {
-    STLDeleteElements(&failure_messages_);
-  }
+  ~MethodVerifier();
 
   // Run verification on the method. Returns true if verification completes and false if the input
   // has an irrecoverable corruption.
@@ -257,8 +256,8 @@
    *      for code flow problems.
    */
   static FailureKind VerifyMethod(uint32_t method_idx, const DexFile* dex_file,
-                                  mirror::DexCache* dex_cache,
-                                  mirror::ClassLoader* class_loader,
+                                  SirtRef<mirror::DexCache>& dex_cache,
+                                  SirtRef<mirror::ClassLoader>& class_loader,
                                   const DexFile::ClassDef* class_def_idx,
                                   const DexFile::CodeItem* code_item,
                                   mirror::ArtMethod* method, uint32_t method_access_flags,
@@ -615,10 +614,10 @@
    * encode it in some clever fashion.
    * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
    */
-  const std::vector<uint8_t>* GenerateGcMap();
+  const std::vector<uint8_t>* GenerateLengthPrefixedGcMap();
 
   // Verify that the GC map associated with method_ is well formed
-  void VerifyGcMap(const std::vector<uint8_t>& data);
+  void VerifyLengthPrefixedGcMap(const std::vector<uint8_t>& data);
 
   // Compute sizes for GC map data
   void ComputeGcMapSizes(size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
@@ -630,7 +629,7 @@
       MethodReferenceComparator> DexGcMapTable;
   static ReaderWriterMutex* dex_gc_maps_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   static DexGcMapTable* dex_gc_maps_ GUARDED_BY(dex_gc_maps_lock_);
-  static void SetDexGcMap(MethodReference ref, const std::vector<uint8_t>& dex_gc_map)
+  static void SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* dex_gc_map)
       LOCKS_EXCLUDED(dex_gc_maps_lock_);
 
 
@@ -685,9 +684,9 @@
   const RegType* return_type_;  // Lazily computed return type of the method.
   const DexFile* const dex_file_;  // The dex file containing the method.
   // The dex_cache for the declaring class of the method.
-  mirror::DexCache* dex_cache_ GUARDED_BY(Locks::mutator_lock_);
+  SirtRef<mirror::DexCache>* dex_cache_ GUARDED_BY(Locks::mutator_lock_);
   // The class loader for the declaring class of the method.
-  mirror::ClassLoader* class_loader_ GUARDED_BY(Locks::mutator_lock_);
+  SirtRef<mirror::ClassLoader>* class_loader_ GUARDED_BY(Locks::mutator_lock_);
   const DexFile::ClassDef* const class_def_;  // The class def of the declaring class of the method.
   const DexFile::CodeItem* const code_item_;  // The code item containing the code for the method.
   const RegType* declaring_class_;  // Lazily computed reg type of the method's declaring class.
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 50d1583..d82e75d 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -928,7 +928,8 @@
     }
     mirror::Class* common_elem = ClassJoin(s_ct, t_ct);
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    mirror::ClassLoader* class_loader = s->GetClassLoader();
+    Thread* self = Thread::Current();
+    SirtRef<mirror::ClassLoader> class_loader(self, s->GetClassLoader());
     std::string descriptor("[");
     descriptor += ClassHelper(common_elem).GetDescriptor();
     mirror::Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 446dd00..9c9673a 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -140,9 +140,10 @@
   // Class was not found, must create new type.
   // Try resolving class
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  SirtRef<mirror::ClassLoader> class_loader(Thread::Current(), loader);
   mirror::Class* klass = NULL;
   if (can_load_classes_) {
-    klass = class_linker->FindClass(descriptor, loader);
+    klass = class_linker->FindClass(descriptor, class_loader);
   } else {
     klass = class_linker->LookupClass(descriptor, loader);
     if (klass != NULL && !klass->IsLoaded()) {
@@ -261,11 +262,11 @@
     FloatType::Destroy();
     DoubleLoType::Destroy();
     DoubleHiType::Destroy();
-    for (uint16_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
+    for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
       PreciseConstType* type = small_precise_constants_[value - kMinSmallConstant];
       delete type;
+      small_precise_constants_[value - kMinSmallConstant] = nullptr;
     }
-
     RegTypeCache::primitive_initialized_ = false;
     RegTypeCache::primitive_count_ = 0;
   }
diff --git a/test/040-miranda/expected.txt b/test/040-miranda/expected.txt
index e22bbd9..011be2a 100644
--- a/test/040-miranda/expected.txt
+++ b/test/040-miranda/expected.txt
@@ -10,3 +10,5 @@
   inInterface:  true
   inInterface2: 28
   inAbstract:   true
+Test getting miranda method via reflection:
+  caught expected NoSuchMethodException
diff --git a/test/040-miranda/src/Main.java b/test/040-miranda/src/Main.java
index 1fd8287..ff5eba0 100644
--- a/test/040-miranda/src/Main.java
+++ b/test/040-miranda/src/Main.java
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Method;
+
 /**
  * Miranda testing.
  */
@@ -37,5 +39,16 @@
         System.out.println("  inInterface:  " + mira2.inInterface());
         System.out.println("  inInterface2: " + mira2.inInterface2());
         System.out.println("  inAbstract:   " + mira2.inAbstract());
+
+        System.out.println("Test getting miranda method via reflection:");
+        try {
+          Class mirandaClass = Class.forName("MirandaAbstract");
+          Method mirandaMethod = mirandaClass.getDeclaredMethod("inInterface", (Class[]) null);
+          System.out.println("  did not expect to find miranda method");
+        } catch (NoSuchMethodException nsme) {
+          System.out.println("  caught expected NoSuchMethodException");
+        } catch (Exception e) {
+          System.out.println("  caught unexpected exception " + e);
+        }
     }
 }
diff --git a/test/JniTest/JniTest.java b/test/JniTest/JniTest.java
index 7014ef9..a1b1f0c 100644
--- a/test/JniTest/JniTest.java
+++ b/test/JniTest/JniTest.java
@@ -14,11 +14,14 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.Method;
+
 class JniTest {
     public static void main(String[] args) {
         System.loadLibrary("arttest");
         testFindClassOnAttachedNativeThread();
         testCallStaticVoidMethodOnSubClass();
+        testGetMirandaMethod();
     }
 
     private static native void testFindClassOnAttachedNativeThread();
@@ -42,4 +45,23 @@
     private static class testCallStaticVoidMethodOnSubClass_SubClass
         extends testCallStaticVoidMethodOnSubClass_SuperClass {
     }
+
+    private static native Method testGetMirandaMethodNative();
+
+    private static void testGetMirandaMethod() {
+        Method m = testGetMirandaMethodNative();
+        if (m.getDeclaringClass() != testGetMirandaMethod_MirandaInterface.class) {
+            throw new AssertionError();
+        }
+    }
+
+    private static abstract class testGetMirandaMethod_MirandaAbstract implements testGetMirandaMethod_MirandaInterface {
+        public boolean inAbstract() {
+            return true;
+        }
+    }
+
+    private static interface testGetMirandaMethod_MirandaInterface {
+        public boolean inInterface();
+    }
 }
diff --git a/test/JniTest/jni_test.cc b/test/JniTest/jni_test.cc
index 72a3309..cfcbb64 100644
--- a/test/JniTest/jni_test.cc
+++ b/test/JniTest/jni_test.cc
@@ -81,3 +81,11 @@
 
   env->CallStaticVoidMethod(sub_class, execute);
 }
+
+extern "C" JNIEXPORT jobject JNICALL Java_JniTest_testGetMirandaMethodNative(JNIEnv* env, jclass) {
+  jclass abstract_class = env->FindClass("JniTest$testGetMirandaMethod_MirandaAbstract");
+  assert(abstract_class != NULL);
+  jmethodID miranda_method = env->GetMethodID(abstract_class, "inInterface", "()Z");
+  assert(miranda_method != NULL);
+  return env->ToReflectedMethod(abstract_class, miranda_method, JNI_FALSE);
+}
diff --git a/test/run-test b/test/run-test
index f706110..c3943e7 100755
--- a/test/run-test
+++ b/test/run-test
@@ -65,7 +65,7 @@
 dev_mode="no"
 update_mode="no"
 debug_mode="no"
-dalvik_mode="no"
+runtime="art"
 usage="no"
 build_only="no"
 
@@ -77,6 +77,7 @@
         shift
     elif [ "x$1" = "x--jvm" ]; then
         target_mode="no"
+        runtime="jvm"
         RUN="${progdir}/etc/reference-run-test-classes"
         NEED_DEX="false"
         shift
@@ -85,7 +86,7 @@
         shift
     elif [ "x$1" = "x--dalvik" ]; then
         lib="libdvm.so"
-        dalvik_mode="yes"
+        runtime="dalvik"
         shift
     elif [ "x$1" = "x--image" ]; then
         shift
@@ -155,15 +156,11 @@
     fi
 done
 
-run_args="${run_args} --lib $lib"
+if [ ! "$runtime" = "jvm" ]; then
+  run_args="${run_args} --lib $lib"
+fi
 
-if [ "$dalvik_mode" = "no" ]; then
-    if [ "$target_mode" = "no" ]; then
-        run_args="${run_args} --boot -Ximage:${ANDROID_HOST_OUT}/framework/core.art"
-    else
-        run_args="${run_args} --boot -Ximage:/data/art-test/core.art"
-    fi
-else
+if [ "$runtime" = "dalvik" ]; then
     if [ "$target_mode" = "no" ]; then
         framework="${OUT}/system/framework"
         bpath="${framework}/core.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/core-junit.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
@@ -171,6 +168,12 @@
     else
         true # defaults to using target BOOTCLASSPATH
     fi
+elif [ "$runtime" = "art" ]; then
+    if [ "$target_mode" = "no" ]; then
+        run_args="${run_args} --boot -Ximage:${ANDROID_HOST_OUT}/framework/core.art"
+    else
+        run_args="${run_args} --boot -Ximage:/data/art-test/core.art"
+    fi
 fi
 
 if [ "$dev_mode" = "yes" -a "$update_mode" = "yes" ]; then